diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000..16de094281 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,3 @@ +cmake_minimum_required(VERSION 2.8.8) +include(bin/dsn.cmake) +dsn_setup_runtime() diff --git a/LICENSE b/LICENSE index b8b569d774..623364765d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,8 @@ The MIT License (MIT) -Copyright (c) 2015 Microsoft +Copyright (c) Microsoft Corporation + +All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Release Note.txt b/Release Note.txt new file mode 100644 index 0000000000..2a6f64dacc --- /dev/null +++ b/Release Note.txt @@ -0,0 +1,33 @@ + +Robust Distributed System Nucleus (rDSN) is an open framework for quickly building robust and high performance distributed systems. Besides Programmability and High-Performance that many other frameworks focus on, rDSN provides a holistic solution to also systematically and (semi-)transparently address topics important for robustness that occur throughout the whole lifetime of a distributed system's development and operation, such as test, debug, deployment, how to scale-up/out, and how to achieve high-availability. The system has been used and validated in production inside Microsoft. The current release is in C++ and can run on Ubuntu, OS X, and Windows. + +Release notes @ 4/15/2015, Version 0.9 + +- Languages and platforms + . C++ + . Ubuntu14.04/OS X Yosemite/Windows 8.1 + +- What's New + + . src/core: the kernel of rDSN, which defines the Service API and the Tool API, and translates the former to the latter. + . Service API: RPC, thread/tasking, synchronizing (lock, semaphore, etc.), asynchronous file IO, and environment (time and random), and the service_app abstraction. + . Tool API: component abstractions for network, task queue, task worker, lock, rw_lock, semaphore, aio_provider, env_provier, message_parser, perf_counter, logging_provider, and the toollet/tool_app abstractions. + + . src/dev: syntactic sugar for application development, including the serverlet abstraction and the typed interface for RPC/task calls. + + . src/tools/common: default component provider instances for locks, logger, aio_provider, network (adapted from boost asio), task queue, etc. It also implements several simple tools such as tracer, profiler, and fault injector. + + . src/tools/simulator: simulation tool implementation which virtualizes time and random, and enables cooperative scheduling of the tasks. Together with the fault injector tool in rDSN.tools.common, it supports random tests with faults, and deterministic replay to reproduce the bugs when exposed. + + . src/dist/failure_detector: a perfect failure detector implementation based on rDSN's programming model, which also serves as a common distributed system component. + + . src/apps/echo: a simple echo service as the application example. + + . src/apps/replication: a replication framework based on rDSN's programming model, which also servers as a common distributed system component. + + . codegen: code generator tool to leverage Apache Thrift/Google Protocol Buffer to generate the application code based on a given IDL file. + + . codegen/libs: code generation templates. + + + diff --git a/bin/dsn.cg.bat b/bin/dsn.cg.bat new file mode 100644 index 0000000000..eba78c7e13 --- /dev/null +++ b/bin/dsn.cg.bat @@ -0,0 +1,4 @@ +@ECHO OFF +FOR /f %%i IN ("%0") DO SET CODEGEN_ROOT=%%~dpi +CALL php -f %CODEGEN_ROOT%\dsn.generate_code.php %1 %2 %3 %4 %5 %6 %7 %8 %9 +:EOF diff --git a/bin/dsn.cg.sh b/bin/dsn.cg.sh new file mode 100755 index 0000000000..a9db7046f1 --- /dev/null +++ b/bin/dsn.cg.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +SOURCE="${BASH_SOURCE[0]}" +while [ -h "$SOURCE" ]; do # resolve $SOURCE until the file is no longer a symlink + DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" + SOURCE="$(readlink "$SOURCE")" + [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" # if $SOURCE was a relative symlink, we need to resolve it relative to the path where the symlink file was located +done +CODEGEN_ROOT="$( cd -P "$( dirname "$SOURCE" )" && pwd )" + +php $CODEGEN_ROOT"/dsn.generate_code.php" $1 $2 $3 $4 $5 $6 $7 $8 $9 diff --git a/bin/dsn.cmake b/bin/dsn.cmake new file mode 100644 index 0000000000..2d9c65d79e --- /dev/null +++ b/bin/dsn.cmake @@ -0,0 +1,338 @@ +function(ms_add_library PROJ_TYPE PROJ_NAME PROJ_SRC DO_INSTALL) + if(NOT((PROJ_TYPE STREQUAL "STATIC") OR (PROJ_TYPE STREQUAL "SHARED") OR (PROJ_TYPE STREQUAL "MODULE"))) + message(FATAL_ERROR "Invalid project type") + endif() + + if(PROJ_NAME STREQUAL "") + message(FATAL_ERROR "Invalid project name") + endif() + + if(MSVC) + add_definitions(-D_LIB) + endif() + + add_library(${PROJ_NAME} ${PROJ_TYPE} ${PROJ_SRC}) + if(DO_INSTALL) + install(TARGETS ${PROJ_NAME} DESTINATION lib) + endif() +endfunction(ms_add_library PROJ_TYPE PROJ_NAME PROJ_SRC DO_INSTALL) + +function(ms_add_executable PROJ_NAME PROJ_SRC INPUT_LIBS BINPLACE_FILES DO_INSTALL) + if(PROJ_NAME STREQUAL "") + message(FATAL_ERROR "Invalid project name") + endif() + + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${PROJ_NAME}") + set(INSTALL_BINPLACE_DIR "bin/${PROJ_NAME}") + + add_executable(${PROJ_NAME} ${PROJ_SRC}) + target_link_libraries(${PROJ_NAME} LINK_PUBLIC ${INPUT_LIBS}) + + set(BINPLACE_DIR "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/") + foreach(BF ${BINPLACE_FILES}) + add_custom_command( + TARGET ${PROJ_NAME} + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${BF} "${BINPLACE_DIR}" + ) + if(DO_INSTALL) + install(FILES ${BF} DESTINATION "${INSTALL_BINPLACE_DIR}") + endif() + endforeach() + + if(DO_INSTALL) + install(TARGETS ${PROJ_NAME} DESTINATION "${INSTALL_BINPLACE_DIR}") + endif() +endfunction(ms_add_executable PROJ_NAME PROJ_SRC INPUT_LIBS BINPLACE_FILES DO_INSTALL) + +macro(ms_add_compiler_flags LANGUAGES SUFFIXES FLAGS) + foreach(LANG ${LANGUAGES}) + foreach(SUFFIX ${SUFFIXES}) + if(SUFFIX STREQUAL "") + set(SUFFIX "") + else() + string(TOUPPER ${SUFFIX} SUFFIX) + set(SUFFIX "_${SUFFIX}") + endif() + set(FLAG_VAR "CMAKE_${LANG}_FLAGS${SUFFIX}") + set(${FLAG_VAR} "${${FLAG_VAR}} ${FLAGS}" PARENT_SCOPE) + message(STATUS ${FLAG_VAR} ":" ${${FLAG_VAR}}) + endforeach() + endforeach() +endmacro(ms_add_compiler_flags LANGUAGES SUFFIXES FLAGS) + +function(ms_link_static_runtime FLAG_VAR) + if(MSVC) + if(${FLAG_VAR} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" "${FLAG_VAR}" "${${FLAG_VAR}}") + #Save persistently + set(${FLAG_VAR} ${${FLAG_VAR}} CACHE STRING "" FORCE) + endif() + endif() +endfunction(ms_link_static_runtime) + +function(ms_replace_compiler_flags REPLACE_OPTION) + set(SUFFIXES "") + if((NOT DEFINED CMAKE_CONFIGURATION_TYPES) OR (CMAKE_CONFIGURATION_TYPES STREQUAL "")) + #set(SUFFIXES "_DEBUG" "_RELEASE" "_MINSIZEREL" "_RELWITHDEBINFO") + if((DEFINED CMAKE_BUILD_TYPE) AND (NOT (CMAKE_BUILD_TYPE STREQUAL ""))) + string(TOUPPER ${CMAKE_BUILD_TYPE} SUFFIXES) + set(SUFFIXES "_${SUFFIXES}") + endif() + else() + foreach(SUFFIX ${CMAKE_CONFIGURATION_TYPES}) + string(TOUPPER ${SUFFIX} SUFFIX) + set(SUFFIXES ${SUFFIXES} "_${SUFFIX}") + endforeach() + endif() + + foreach(SUFFIX "" ${SUFFIXES}) + foreach(LANG C CXX) + set(FLAG_VAR "CMAKE_${LANG}_FLAGS${SUFFIX}") + if(${REPLACE_OPTION} STREQUAL "STATIC_LINK") + ms_link_static_runtime(${FLAG_VAR}) + endif() + endforeach() + #message(STATUS ${FLAG_VAR} ":" ${${FLAG_VAR}}) + endforeach() +endfunction(ms_replace_compiler_flags REPLACE_OPTION) + +function(ms_check_cxx11_support) + if(UNIX) + include(CheckCXXCompilerFlag) + CHECK_CXX_COMPILER_FLAG("-std=c++11" COMPILER_SUPPORTS_CXX11) + else() + if(MSVC_VERSION LESS 1700) + set(COMPILER_SUPPORTS_CXX11 0) + else() + set(COMPILER_SUPPORTS_CXX11 1) + endif() + endif() + + if(COMPILER_SUPPORTS_CXX11) + else() + message(FATAL_ERROR "You need a compiler with C++11 support.") + endif() +endfunction(ms_check_cxx11_support) + + +function(dsn_add_library PROJ_NAME) + if((NOT DEFINED DSN_RECURSIVE_SRC) OR (NOT DSN_RECURSIVE_SRC)) + set(MY_GLOB_OPTION "GLOB") + else() + set(MY_GLOB_OPTION "GLOB_RECURSE") + endif() + + file(${MY_GLOB_OPTION} + PROJ_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/*.c" + "${CMAKE_CURRENT_SOURCE_DIR}/*.h" + ) + set(PROJ_SRC ${PROJ_SRC} ${DSN_EXTRA_SRC}) + ms_add_library("STATIC" ${PROJ_NAME} "${PROJ_SRC}" 1) +endfunction(dsn_add_library) + +function(dsn_add_executable PROJ_NAME BINPLACE_FILES) + if((NOT DEFINED DSN_RECURSIVE_SRC) OR (NOT DSN_RECURSIVE_SRC)) + set(MY_GLOB_OPTION "GLOB") + else() + set(MY_GLOB_OPTION "GLOB_RECURSE") + endif() + + file(${MY_GLOB_OPTION} + PROJ_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/*.c" + "${CMAKE_CURRENT_SOURCE_DIR}/*.h" + ) + set(PROJ_SRC ${PROJ_SRC} ${DSN_EXTRA_SRC}) + set(INPUT_LIBS ${DSN_EXTRA_LIBS} ${DSN_LIBS}) + ms_add_executable(${PROJ_NAME} "${PROJ_SRC}" "${INPUT_LIBS}" "${BINPLACE_FILES}" 0) +endfunction(dsn_add_executable PROJ_NAME BINPLACE_FILES) + +function(dsn_setup_compiler_flags) + ms_replace_compiler_flags("STATIC_LINK") + + if(UNIX) + add_compile_options(-std=c++11) + if(DEFINED DSN_PEDANTIC) + add_compile_options(-Werror) + endif() + elseif(MSVC) + add_definitions(-D_CONSOLE) + add_definitions(-D_CRT_SECURE_NO_WARNINGS) + add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE) + add_definitions(-D_WINSOCK_DEPRECATED_NO_WARNINGS=1) + add_definitions(-D_WIN32_WINNT=0x0501) + if(DEFINED DSN_PEDANTIC) + add_compile_options(-WX) + endif() + endif() +endfunction(dsn_setup_compiler_flags) + +function(ms_setup_boost STATIC_LINK PACKAGES) + set(Boost_USE_MULTITHREADED ON) + if(STATIC_LINK) + set(Boost_USE_STATIC_LIBS ON) + set(Boost_USE_STATIC_RUNTIME ON) + else() + set(Boost_USE_STATIC_LIBS OFF) + set(Boost_USE_STATIC_RUNTIME OFF) + endif() + + find_package(Boost COMPONENTS ${PACKAGES} REQUIRED) + + set(BOOST_REQUIRED_LIBS "") + foreach(PACKAGE ${PACKAGES}) + string(TOUPPER ${PACKAGE} PACKAGE) + set(BOOST_REQUIRED_LIBS ${BOOST_REQUIRED_LIBS} ${Boost_${PACKAGE}_LIBRARY}) + endforeach() + set(BOOST_REQUIRED_LIBS ${BOOST_REQUIRED_LIBS} CACHE STRING "Required boost packages" FORCE) + +endfunction(ms_setup_boost STATIC_LINK PACKAGES) + +function(dsn_setup_packages) + set(BOOST_PACKAGES + ${DSN_EXTRA_BOOST_PACKAGES} + thread + regex + system + filesystem + chrono + date_time + ) + ms_setup_boost(1 "${BOOST_PACKAGES}") + + find_package(Threads REQUIRED) + + set(DSN_SYSTEM_LIBS "") + set(DSN_CORE_LIBS "") + set(DSN_LIBS "") + set(DSN_CORE_TARGETS + dsn.failure_detector + dsn.tools.simulator + dsn.tools.common + dsn.dev + dsn.core + ) + + if(UNIX AND (NOT APPLE)) + set(DSN_SYSTEM_LIBS ${DSN_SYSTEM_LIBS} rt) + endif() + + set(DSN_SYSTEM_LIBS + ${DSN_SYSTEM_LIBS} + ${CMAKE_THREAD_LIBS_INIT} + ${BOOST_REQUIRED_LIBS} + ) + + if(DSN_BUILD_RUNTIME) + set(DSN_LIBS ${DSN_CORE_TARGETS}) + else() + ######Useless####### + if(MSVC) + set(RLEXT ".lib") + else() + set(RLEXT ".a") + endif() + + foreach(RL ${DSN_EXTRA_TARGETS}) + #set(DSN_LIBS ${DSN_LIBS} "lib${RL}${RLEXT}") + endforeach() + + foreach(RL ${DSN_CORE_TARGETS}) + #set(DSN_LIBS ${DSN_LIBS} "lib${RL}${RLEXT}") + endforeach() + #################### + + set(DSN_LIBS ${DSN_CORE_TARGETS}) + endif() + + set(DSN_LIBS ${DSN_LIBS} ${DSN_SYSTEM_LIBS} CACHE STRING "rDSN libs" FORCE) +endfunction(dsn_setup_packages) + +function(dsn_set_output_path) + set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin CACHE STRING "" FORCE) + set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib CACHE STRING "" FORCE) + set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY} CACHE STRING "" FORCE) +endfunction(dsn_set_output_path) + +function(dsn_setup_version) + set(DSN_VERSION_MAJOR 1 CACHE STRING "rDSN major version" FORCE) + set(DSN_VERSION_MINOR 0 CACHE STRING "rDSN minor version" FORCE) + set(DSN_VERSION_PATCH 0 CACHE STRING "rDSN patch version" FORCE) +endfunction(dsn_setup_version) + +function(dsn_setup_include_path) + include_directories(${BOOST_INCLUDEDIR}) + if(DSN_BUILD_RUNTIME) + include_directories(${CMAKE_SOURCE_DIR}/include) + else() + include_directories(${DSN_ROOT}/include) + endif() + include_directories(${DSN_EXTRA_INCLUDEDIR}) +endfunction(dsn_setup_include_path) + +function(dsn_setup_link_path) + link_directories(${BOOST_LIBRARYDIR} ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY} ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) + if(DSN_BUILD_RUNTIME) + else() + link_directories(${DSN_ROOT}/lib) + endif() + link_directories(${DSN_EXTRA_LIBRARYDIR}) +endfunction(dsn_setup_link_path) + +function(dsn_setup_install) + if(DSN_BUILD_RUNTIME) + install(DIRECTORY include/ DESTINATION include) + install(DIRECTORY bin/ DESTINATION bin) + if(MSVC) + install(FILES "bin/dsn.cg.bat" DESTINATION bin) + else() + install(PROGRAMS "bin/dsn.cg.sh" DESTINATION bin) + endif() + endif() +endfunction(dsn_setup_install) + +function(dsn_add_pseudo_projects) + if(DSN_BUILD_RUNTIME AND MSVC_IDE) + file(GLOB_RECURSE + PROJ_SRC + "${CMAKE_SOURCE_DIR}/include/*.h" + ) + add_custom_target("dsn.include" SOURCES ${PROJ_SRC}) + endif() +endfunction(dsn_add_pseudo_projects) + +function(dsn_common_setup) + if(NOT (UNIX OR WIN32)) + message(FATAL_ERROR "Only Unix and Windows are supported.") + endif() + + if(CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR) + message(FATAL_ERROR "In-source builds are not allowed.") + endif() + + if(NOT DEFINED DSN_BUILD_RUNTIME) + message(FATAL_ERROR "DSN_BUILD_RUNTIME is not defined.") + endif() + + #set(BUILD_SHARED_LIBS OFF) + dsn_setup_version() + ms_check_cxx11_support() + dsn_setup_compiler_flags() + dsn_setup_packages() + dsn_setup_include_path() + dsn_setup_link_path() + dsn_set_output_path() + dsn_setup_install() +endfunction(dsn_common_setup) + +function(dsn_setup_runtime) + project(dsn C CXX) + set(DSN_BUILD_RUNTIME 1) + dsn_common_setup() + dsn_add_pseudo_projects() + add_subdirectory(src) +endfunction(dsn_setup_runtime) diff --git a/bin/dsn.generate_code.php b/bin/dsn.generate_code.php new file mode 100644 index 0000000000..1770f0c010 --- /dev/null +++ b/bin/dsn.generate_code.php @@ -0,0 +1,213 @@ += 4) + $g_mode = $argv[3]; +else + $g_mode = "single"; + +if ($g_mode != "single" && $g_mode != "replication") +{ + echo "invalid mode '$g_mode'".PHP_EOL; + usage(); + exit(0); +} + +if (!file_exists($g_idl)) +{ + echo "input file '". $g_idl ."' is not found.".PHP_EOL; + exit(0); +} +else +{ + if (strlen($g_idl) > strlen(".thrift") + && substr($g_idl, strlen($g_idl) - strlen(".thrift")) == ".thrift") + { + $g_idl_type = "thrift"; + $g_idl_post = ".php"; + } + else if (strlen($g_idl) > strlen(".proto") + && substr($g_idl, strlen($g_idl) - strlen(".proto")) == ".proto") + { + $g_idl_type = "proto"; + $g_idl_post = ".pb.php"; + } + else + { + echo "unknown idl type for input file '".$g_idl."'".PHP_EOL; + exit(0); + } +} + +$pos = strrpos($g_idl, "\\"); +$pos2 = strrpos($g_idl, "/"); +if ($pos == FALSE && $pos2 == FALSE) +{ + $g_program = substr($g_idl, 0, strlen($g_idl) - strlen($g_idl_type) - 1); +} +else if ($pos != FALSE) +{ + $g_program = substr($g_idl, $pos + 1, strlen($g_idl) - $pos - 1 - strlen($g_idl_type) - 1); +} +else +{ + $g_program = substr($g_idl, $pos2 + 1, strlen($g_idl) - $pos2 - 1 - strlen($g_idl_type) - 1); +} + +$g_idl_php = $g_out_dir."/".$g_program.$g_idl_post; + +if (!file_exists($g_out_dir)) +{ + if (!mkdir($g_out_dir)) + { + echo "create output directory '". $g_out_dir ."' failed.".PHP_EOL; + exit(0); + } + else + { + echo "output directory '". $g_out_dir ."' created.".PHP_EOL; + } +} + +// generate service definition file from input idl file using the code generation tools +switch ($g_idl_type) +{ +case "thrift": + { + $command = $g_cg_dir."/thrift --gen rdsn -out ".$g_out_dir." ".$g_idl; + echo "exec: ".$command.PHP_EOL; + system($command); + if (!file_exists($g_idl_php)) + { + echo "failed invoke thrift tool to generate '".$g_idl_php."'".PHP_EOL; + exit(0); + } + } + break; +case "proto": + { + $command = $g_cg_dir."/protoc --rdsn_out=".$g_out_dir." ".$g_idl; + echo "exec: ".$command.PHP_EOL; + system($command); + if (!file_exists($g_idl_php)) + { + echo "failed invoke protoc tool to generate '".$g_idl_php."'".PHP_EOL; + exit(0); + } + } + break; +default: + echo "idl type '". $g_idl_type ."' not supported yet!".PHP_EOL; + exit(0); +} + +// load annotations when they are present +if (file_exists($g_idl.".annotations")) +{ + $annotations = parse_ini_file($g_idl.".annotations", true); + if (FALSE == $annotations) + { + echo "read annotation file $g_idl.annotations failed".PHP_EOL; + exit(0); + } + + $as = "add_annotations(Array(".PHP_EOL; + foreach ($annotations as $s => $kvs) + { + $as .= "\t\"".$s."\" => Array(".PHP_EOL; + foreach($kvs as $k => $v) + { + $as .= "\t\t\"".$k."\" => \"". $v ."\",".PHP_EOL; + } + $as .= "\t),".PHP_EOL; + } + $as .= "));".PHP_EOL; + $as .= "?>".PHP_EOL; + + file_put_contents($g_idl_php, $as, FILE_APPEND); +} + +function generate_files_from_dir($dr) +{ + global $g_cg_libs; + global $g_idl_php; + global $g_program; + global $g_out_dir; + global $g_idl_type; + + foreach (scandir($dr) as $template) + { + if ($template == "type.php" + || $template == "." + || $template == ".." + ) + continue; + + if (is_dir($dr."/".$template)) + continue; + + if ($template == "config.ini.php" + || $template == "CMakeLists.txt.php" + ) + $output_file = $g_out_dir."/".substr($template, 0, strlen($template)-4); + else + $output_file = $g_out_dir."/".$g_program.".".substr($template, 0, strlen($template)-4); + + $command = "php -f ".$dr."/".$template + ." ".$g_cg_libs."/type.php" + ." ".$g_idl_php + ." ".$g_program + ." ".$g_idl_type + ." >".$output_file + ; + + //echo "exec: ".$command.PHP_EOL; + system($command); + if (!file_exists($output_file)) + { + echo "failed to generate '".$output_file."'".PHP_EOL; + exit(0); + } + else + { + echo "generate '".$output_file."' successfully!".PHP_EOL; + } + } +} + +generate_files_from_dir($g_cg_libs); +generate_files_from_dir($g_cg_libs."/".$g_mode); + +?> diff --git a/bin/dsn.templates/code.definition.h.php b/bin/dsn.templates/code.definition.h.php new file mode 100644 index 0000000000..33ad952caa --- /dev/null +++ b/bin/dsn.templates/code.definition.h.php @@ -0,0 +1,27 @@ + +# pragma once +# include +# include "name?>.types.h" + +get_cpp_namespace_begin().PHP_EOL; + +foreach ($_PROG->services as $svc) +{ + echo "\t// define RPC task code for service '". $svc->name ."'". PHP_EOL; + foreach ($svc->functions as $f) + { + echo "\tDEFINE_TASK_CODE_RPC(". $f->get_rpc_code() + . ", ::dsn::TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT)".PHP_EOL; + } +} +echo "\t// test timer task code".PHP_EOL; +echo "\tDEFINE_TASK_CODE(". $_PROG->get_test_task_code() + . ", ::dsn::TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT)".PHP_EOL; + +echo $_PROG->get_cpp_namespace_end().PHP_EOL; +?> diff --git a/bin/dsn.templates/replication/CMakeLists.txt.php b/bin/dsn.templates/replication/CMakeLists.txt.php new file mode 100644 index 0000000000..9c0b80a9cb --- /dev/null +++ b/bin/dsn.templates/replication/CMakeLists.txt.php @@ -0,0 +1,27 @@ + +cmake_minimum_required(VERSION 2.8.8) + +set(DSN_ROOT "") +if(NOT EXISTS "${DSN_ROOT}/") + message(FATAL_ERROR "Please make sure that ${DSN_ROOT} exists.") +endif() + +include("${DSN_ROOT}/bin/dsn.cmake") + +set(DSN_APP_TARGET "name?>") +project(${DSN_APP_TARGET} C CXX) +set(DSN_BUILD_RUNTIME 0) +set(DSN_EXTRA_BOOST_PACKAGES "") +set(DSN_EXTRA_INCLUDEDIR "") +set(DSN_EXTRA_LIBRARYDIR "") +set(DSN_EXTRA_LIBS dsn.replication dsn.replication.meta_server dsn.replication.clientlib dsn.failure_detector) +set(DSN_EXTRA_SRC "") +dsn_common_setup() +file(GLOB BINPLACE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.ini") +dsn_add_executable(${DSN_APP_TARGET} "${BINPLACE_FILES}") diff --git a/bin/dsn.templates/replication/app.example.h.php b/bin/dsn.templates/replication/app.example.h.php new file mode 100644 index 0000000000..f1ebf242b2 --- /dev/null +++ b/bin/dsn.templates/replication/app.example.h.php @@ -0,0 +1,92 @@ + +# pragma once +# include ".client.h" +# include ".server.h" + +get_cpp_namespace_begin()?> + +// client app example +class name?>_client_app : public ::dsn::service::service_app, public virtual ::dsn::service::servicelet +{ +public: + name?>_client_app(::dsn::service_app_spec* s) + : ::dsn::service::service_app(s) + { +services as $svc) { ?> + _name?>_client = nullptr; + + } + + ~name?>_client_app() + { + stop(); + } + + virtual ::dsn::error_code start(int argc, char** argv) + { + if (argc < 2) + return ::dsn::ERR_INVALID_PARAMETERS; + + std::vector<::dsn::end_point> meta_servers; + auto cf = ::dsn::service::service_app::config(); + ::dsn::replication::replication_app_client_base::load_meta_servers(cf, meta_servers); + +services as $svc) { ?> + _name?>_client = new name?>_client(meta_servers, argv[1]); + + _timer = ::dsn::service::tasking::enqueue(get_test_task_code()?>, this, &name?>_client_app::on_test_timer, 0, 0, 1000); + return ::dsn::ERR_SUCCESS; + } + + virtual void stop(bool cleanup = false) + { + _timer->cancel(true); +services as $svc) { ?> + if (_name?>_client != nullptr) + { + delete _name?>_client; + _name?>_client = nullptr; + } + + } + + void on_test_timer() + { +services as $svc) +{ + echo "\t\t// test for service '". $svc->name ."'". PHP_EOL; + foreach ($svc->functions as $f) +{?> + { + get_first_param()->get_cpp_type()?> req; +is_one_way()) { ?> + _name?>_client->name?>(req); + + //sync: + get_cpp_return_type()?> resp; + auto err = _name?>_client->name?>(req, resp); + std::cout << "call get_rpc_code()?> end, return " << err.to_string() << std::endl; + //async: + //_name?>_client->begin_name?>(req); + + } + + } + +private: + ::dsn::task_ptr _timer; + ::dsn::end_point _server; + +services as $svc) { ?> + name?>_client *_name?>_client; + +}; + +get_cpp_namespace_end()?> diff --git a/bin/dsn.templates/replication/client.h.php b/bin/dsn.templates/replication/client.h.php new file mode 100644 index 0000000000..41fb4cbad4 --- /dev/null +++ b/bin/dsn.templates/replication/client.h.php @@ -0,0 +1,146 @@ + +# pragma once +# include +# include ".code.definition.h" +# include + +get_cpp_namespace_begin()?> + +services as $svc) { ?> +class name?>_client + : public ::dsn::replication::replication_app_client_base +{ +public: + name?>_client( + const std::vector& meta_servers, + const char* app_name) + : ::dsn::replication::replication_app_client_base(meta_servers, app_name) + { + } + + virtual ~name?>_client() {} + + // from requests to partition index + // PLEASE DO RE-DEFINE THEM IN A SUB CLASS!!! +functions as $f) + $keys[$f->get_first_param()->get_cpp_type()] = 1; + + +foreach ($keys as $k => $v) +{ + echo "\tvirtual int get_partition_index(const ".$k."& key) { return 0;};".PHP_EOL; +} +?> +functions as $f) { ?> + + // ---------- call get_rpc_code()?> ------------ +is_one_way()) {?> + void name?>(const get_first_param()->get_cpp_type()?>& get_first_param()->name?>) + { + ::dsn::replication::replication_app_client_base::is_write ? "write":"read"?><get_first_param()->get_cpp_type()?>, get_cpp_return_type()?>>( + get_partition_index(get_first_param()->name?>), + get_rpc_code()?>, + get_first_param()->name?>, + nullptr, + nullptr + ); + } + + // - synchronous + ::dsn::error_code name?>( + const get_first_param()->get_cpp_type()?>& get_first_param()->name?>, + __out_param get_cpp_return_type()?>& resp, + int timeout_milliseconds = 0 + ) + { + auto resp_task = ::dsn::replication::replication_app_client_base::is_write ? "write":"read"?><get_first_param()->get_cpp_type()?>, get_cpp_return_type()?>>( + get_partition_index(get_first_param()->name?>), + get_rpc_code()?>, + get_first_param()->name?>, + nullptr, + nullptr, + nullptr, + timeout_milliseconds + ); + resp_task->wait(); + if (resp_task->error() == ::dsn::ERR_SUCCESS) + { + unmarshall(resp_task->get_response()->reader(), resp); + } + return resp_task->error(); + } + + // - asynchronous with on-stack get_first_param()->get_cpp_type()?> and get_cpp_return_type()?> + ::dsn::rpc_response_task_ptr begin_name?>( + const get_first_param()->get_cpp_type()?>& get_first_param()->name?>, + void* context = nullptr, + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + return ::dsn::replication::replication_app_client_base::is_write ? "write":"read"?><name?>_client, get_first_param()->get_cpp_type()?>, get_cpp_return_type()?>>( + get_partition_index(get_first_param()->name?>), + get_rpc_code()?>, + get_first_param()->name?>, + this, + &name?>_client::end_name?>, + context, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_name?>( + ::dsn::error_code err, + const get_cpp_return_type()?>& resp, + void* context) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply get_rpc_code()?> err : " << err.to_string() << std::endl; + else + { + std::cout << "reply get_rpc_code()?> ok" << std::endl; + } + } + + // - asynchronous with on-heap std::shared_ptr<get_first_param()->get_cpp_type()?>> and std::shared_ptr<get_cpp_return_type()?>> + ::dsn::rpc_response_task_ptr begin_name?>2( + std::shared_ptr<get_first_param()->get_cpp_type()?>>& get_first_param()->name?>, + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + return ::dsn::replication::replication_app_client_base::is_write ? "write":"read"?><name?>_client, get_first_param()->get_cpp_type()?>, get_cpp_return_type()?>>( + get_partition_index(*get_first_param()->name?>), + get_rpc_code()?>, + get_first_param()->name?>, + this, + &name?>_client::end_name?>2, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_name?>2( + ::dsn::error_code err, + std::shared_ptr<get_first_param()->get_cpp_type()?>>& get_first_param()->name?>, + std::shared_ptr<get_cpp_return_type()?>>& resp) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply get_rpc_code()?> err : " << err.to_string() << std::endl; + else + { + std::cout << "reply get_rpc_code()?> ok" << std::endl; + } + } + + + +}; + + +get_cpp_namespace_end()?> diff --git a/bin/dsn.templates/replication/config.ini.php b/bin/dsn.templates/replication/config.ini.php new file mode 100644 index 0000000000..20d9b6a65a --- /dev/null +++ b/bin/dsn.templates/replication/config.ini.php @@ -0,0 +1,131 @@ + +[apps.metaserver] +name = meta +type = meta +arguments = +ports = 34601 +run = true +count = 1 + +[apps.replicaserver] +name = replica +type = replica +arguments = +ports = 34801 +run = true +count = 3 + +[apps.client] +name = client +type = client +arguments = name?>.instance0 +run = true +count = 2 + +[core] + +tool = simulator +;tool = nativerun +;toollets = tracer +;toollets = tracer, profiler, fault_injector +pause_on_start = false + +logging_factory_name = dsn::tools::screen_logger + +[tools.simulator] +random_seed = 2756568580 +use_given_random_seed = false + +[network] +; how many network threads for network library(used by asio) +io_service_worker_count = 2 + +[network.34601] +; channel = network_header_format, network_provider_name, buffer_block_size +;RPC_CHANNEL_TCP = NET_HDR_DSN, dsn::tools::asio_network_provider, 65536 + +;RPC_CHANNEL_TCP = NET_HDR_THRIFT, dsn::tools::asio_network_provider, 65536 + + +; specification for each thread pool +[threadpool.default] +worker_count = 1 + +[threadpool.THREAD_POOL_DEFAULT] +name = default +partitioned = false +max_input_queue_length = 1024 +worker_priority = THREAD_xPRIORITY_NORMAL +worker_count = 2 + +[threadpool.THREAD_POOL_REPLICATION] +name = replication +partitioned = true +; max_input_queue_length = 8192 +worker_priority = THREAD_xPRIORITY_NORMAL + +[task.default] +is_trace = true +is_profile = true +allow_inline = false +rpc_call_channel = RPC_CHANNEL_TCP +fast_execution_in_network_thread = false +rpc_call_header_format_name = dsn +rpc_timeout_milliseconds = 5000 + +[task.LPC_AIO_IMMEDIATE_CALLBACK] +is_trace = false +allow_inline = false + +[task.LPC_RPC_TIMEOUT] +is_trace = false + +[task.RPC_FD_FAILURE_DETECTOR_PING] +is_trace = false + +[task.RPC_FD_FAILURE_DETECTOR_PING_ACK] +is_trace = false + +[task.LPC_BEACON_CHECK] +is_trace = false + + +[replication.meta_servers] +localhost:34601 + +[replication.app] +app_name = name?>.instance0 +app_type = name?> +partition_count = 1 +max_replica_count = 3 + +[replication] + +prepare_timeout_ms_for_secondaries = 1000 +learn_timeout_ms = 30000 +staleness_for_commit = 20 +staleness_for_start_prepare_for_potential_secondary = 110 +mutation_max_size_mb = 15 +mutation_max_pending_time_ms = 20 +mutation_2pc_min_replica_count = 2 + +preapre_list_max_size_mb = 250 +request_batch_disabled = false +group_check_internal_ms = 100000 +group_check_disabled = false +fd_disabled = false +fd_check_interval_seconds = 5 +fd_beacon_interval_seconds = 3 +fd_lease_seconds = 14 +fd_grace_seconds = 15 +working_dir = . +log_buffer_size_mb = 1 +log_pending_max_ms = 100 +log_file_size_mb = 32 +log_batch_write = true + +config_sync_interval_ms = 60000 diff --git a/bin/dsn.templates/replication/main.cpp.php b/bin/dsn.templates/replication/main.cpp.php new file mode 100644 index 0000000000..e98621588f --- /dev/null +++ b/bin/dsn.templates/replication/main.cpp.php @@ -0,0 +1,37 @@ + +// apps +# include ".app.example.h" +# include ".server.impl.h" + +// tools +# include +# include +# include +# include +# include + +int main(int argc, char** argv) +{ + // register replication application provider + dsn::replication::register_replica_provider<get_cpp_namespace().$_PROG->name?>_service_impl>("name?>"); + + // register all possible services + dsn::service::system::register_service<::dsn::replication::meta_service_app>("meta"); + dsn::service::system::register_service<::dsn::replication::replication_service_app>("replica"); + dsn::service::system::register_service<get_cpp_namespace().$_PROG->name?>_client_app>("client"); + + // register all possible tools and toollets + dsn::tools::register_tool("nativerun"); + dsn::tools::register_tool("simulator"); + dsn::tools::register_toollet("tracer"); + dsn::tools::register_toollet("profiler"); + dsn::tools::register_toollet("fault_injector"); + + // specify what services and tools will run in config file, then run + dsn::service::system::run("config.ini", true); + return 0; +} diff --git a/bin/dsn.templates/replication/server.h.php b/bin/dsn.templates/replication/server.h.php new file mode 100644 index 0000000000..281d0b21b7 --- /dev/null +++ b/bin/dsn.templates/replication/server.h.php @@ -0,0 +1,69 @@ + +# pragma once +# include +# include ".code.definition.h" +# include + +get_cpp_namespace_begin()?> + +services as $svc) { ?> +class name?>_service + : public ::dsn::replication::replication_app_base +{ +public: + name?>_service(::dsn::replication::replica* replica, ::dsn::configuration_ptr& config) + : ::dsn::replication::replication_app_base(replica, config) + { + open_service(); + } + + virtual ~name?>_service() + { + close_service(); + } + +protected: + // all service handlers to be implemented further +functions as $f) { ?> + // get_rpc_code()?> +is_one_way()) {?> + virtual void on_name?>(const get_first_param()->get_cpp_type()?>& get_first_param()->name?>) + { + std::cout << "... exec get_rpc_code()?> ... (not implemented) " << std::endl; + } + + virtual void on_name?>(const get_first_param()->get_cpp_type()?>& get_first_param()->name?>, ::dsn::service::rpc_replier<get_cpp_return_type()?>>& reply) + { + std::cout << "... exec get_rpc_code()?> ... (not implemented) " << std::endl; + get_cpp_return_type()?> resp; + reply(resp); + } + + + +public: + void open_service() + { +functions as $f) { ?> +is_one_way()) {?> + this->register_rpc_handler(get_rpc_code()?>, "name?>", &name?>_service::on_name?>); + + this->register_async_rpc_handler(get_rpc_code()?>, "name?>", &name?>_service::on_name?>); + + + } + + void close_service() + { +functions as $f) { ?> + this->unregister_rpc_handler(get_rpc_code()?>); + + } +}; + + +get_cpp_namespace_end()?> diff --git a/bin/dsn.templates/single/CMakeLists.txt.php b/bin/dsn.templates/single/CMakeLists.txt.php new file mode 100644 index 0000000000..393d37e18c --- /dev/null +++ b/bin/dsn.templates/single/CMakeLists.txt.php @@ -0,0 +1,27 @@ + +cmake_minimum_required(VERSION 2.8.8) + +set(DSN_ROOT "") +if(NOT EXISTS "${DSN_ROOT}/") + message(FATAL_ERROR "Please make sure that ${DSN_ROOT} exists.") +endif() + +include("${DSN_ROOT}/bin/dsn.cmake") + +set(DSN_APP_TARGET "name?>") +project(${DSN_APP_TARGET} C CXX) +set(DSN_BUILD_RUNTIME 0) +set(DSN_EXTRA_BOOST_PACKAGES "") +set(DSN_EXTRA_INCLUDEDIR "") +set(DSN_EXTRA_LIBRARYDIR "") +set(DSN_EXTRA_LIBS "") +set(DSN_EXTRA_SRC "") +dsn_common_setup() +file(GLOB BINPLACE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.ini") +dsn_add_executable(${DSN_APP_TARGET} "${BINPLACE_FILES}") diff --git a/bin/dsn.templates/single/app.example.h.php b/bin/dsn.templates/single/app.example.h.php new file mode 100644 index 0000000000..b1ad2fc6b0 --- /dev/null +++ b/bin/dsn.templates/single/app.example.h.php @@ -0,0 +1,117 @@ + +# pragma once +# include ".client.h" +# include ".server.h" + +get_cpp_namespace_begin()?> + +// server app example +class name?>_server_app : public ::dsn::service::service_app +{ +public: + name?>_server_app(::dsn::service_app_spec* s) + : ::dsn::service::service_app(s) {} + + virtual ::dsn::error_code start(int argc, char** argv) + { +services as $svc) { ?> + _name?>_svc.open_service(); + + return ::dsn::ERR_SUCCESS; + } + + virtual void stop(bool cleanup = false) + { +services as $svc) { ?> + _name?>_svc.close_service(); + + } + +private: +services as $svc) { ?> + name?>_service _name?>_svc; + +}; + +// client app example +class name?>_client_app : public ::dsn::service::service_app, public virtual ::dsn::service::servicelet +{ +public: + name?>_client_app(::dsn::service_app_spec* s) + : ::dsn::service::service_app(s) + { +services as $svc) { ?> + _name?>_client = nullptr; + + } + + ~name?>_client_app() + { + stop(); + } + + virtual ::dsn::error_code start(int argc, char** argv) + { + if (argc < 3) + return ::dsn::ERR_INVALID_PARAMETERS; + + _server = ::dsn::end_point(argv[1], (uint16_t)atoi(argv[2])); +services as $svc) { ?> + _name?>_client = new name?>_client(_server); + + _timer = ::dsn::service::tasking::enqueue(get_test_task_code()?>, this, &name?>_client_app::on_test_timer, 0, 0, 1000); + return ::dsn::ERR_SUCCESS; + } + + virtual void stop(bool cleanup = false) + { + _timer->cancel(true); +services as $svc) { ?> + if (_name?>_client != nullptr) + { + delete _name?>_client; + _name?>_client = nullptr; + } + + } + + void on_test_timer() + { +services as $svc) +{ + echo "\t\t// test for service '". $svc->name ."'". PHP_EOL; + foreach ($svc->functions as $f) +{?> + { + get_first_param()->get_cpp_type()?> req; +is_one_way()) { ?> + _name?>_client->name?>(req); + + //sync: + get_cpp_return_type()?> resp; + auto err = _name?>_client->name?>(req, resp); + std::cout << "call get_rpc_code()?> end, return " << err.to_string() << std::endl; + //async: + //_name?>_client->begin_name?>(req); + + } + + } + +private: + ::dsn::task_ptr _timer; + ::dsn::end_point _server; + +services as $svc) { ?> + name?>_client *_name?>_client; + +}; + +get_cpp_namespace_end()?> diff --git a/bin/dsn.templates/single/client.h.php b/bin/dsn.templates/single/client.h.php new file mode 100644 index 0000000000..87252060f7 --- /dev/null +++ b/bin/dsn.templates/single/client.h.php @@ -0,0 +1,130 @@ + +# pragma once +# include +# include ".code.definition.h" +# include + + +get_cpp_namespace_begin()?> + +services as $svc) { ?> +class name?>_client + : public virtual ::dsn::service::servicelet +{ +public: + name?>_client(const ::dsn::end_point& server) { _server = server; } + name?>_client() { _server = ::dsn::end_point::INVALID; } + virtual ~name?>_client() {} + +functions as $f) { ?> + + // ---------- call get_rpc_code()?> ------------ +is_one_way()) {?> + void name?>( + const get_first_param()->get_cpp_type()?>& get_first_param()->name?>, + int hash = 0, + const ::dsn::end_point *p_server_addr = nullptr) + { + ::dsn::message_ptr msg = ::dsn::message::create_request(get_rpc_code()?>, 0, hash); + marshall(msg->writer(), get_first_param()->name?>); + ::dsn::service::rpc::call_one_way(p_server_addr ? *p_server_addr : _server, msg); + } + + // - synchronous + ::dsn::error_code name?>( + const get_first_param()->get_cpp_type()?>& get_first_param()->name?>, + __out_param get_cpp_return_type()?>& resp, + int timeout_milliseconds = 0, + int hash = 0, + const ::dsn::end_point *p_server_addr = nullptr) + { + ::dsn::message_ptr msg = ::dsn::message::create_request(get_rpc_code()?>, timeout_milliseconds, hash); + marshall(msg->writer(), get_first_param()->name?>); + auto resp_task = ::dsn::service::rpc::call(p_server_addr ? *p_server_addr : _server, msg, nullptr); + resp_task->wait(); + if (resp_task->error() == ::dsn::ERR_SUCCESS) + { + unmarshall(resp_task->get_response()->reader(), resp); + } + return resp_task->error(); + } + + // - asynchronous with on-stack get_first_param()->get_cpp_type()?> and get_cpp_return_type()?> + ::dsn::rpc_response_task_ptr begin_name?>( + const get_first_param()->get_cpp_type()?>& get_first_param()->name?>, + void* context = nullptr, + int timeout_milliseconds = 0, + int reply_hash = 0, + int request_hash = 0, + const ::dsn::end_point *p_server_addr = nullptr) + { + return ::dsn::service::rpc::call_typed( + p_server_addr ? *p_server_addr : _server, + get_rpc_code()?>, + get_first_param()->name?>, + this, + &name?>_client::end_name?>, + context, + request_hash, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_name?>( + ::dsn::error_code err, + const get_cpp_return_type()?>& resp, + void* context) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply get_rpc_code()?> err : " << err.to_string() << std::endl; + else + { + std::cout << "reply get_rpc_code()?> ok" << std::endl; + } + } + + // - asynchronous with on-heap std::shared_ptr<get_first_param()->get_cpp_type()?>> and std::shared_ptr<get_cpp_return_type()?>> + ::dsn::rpc_response_task_ptr begin_name?>2( + std::shared_ptr<get_first_param()->get_cpp_type()?>>& get_first_param()->name?>, + int timeout_milliseconds = 0, + int reply_hash = 0, + int request_hash = 0, + const ::dsn::end_point *p_server_addr = nullptr) + { + return ::dsn::service::rpc::call_typed( + p_server_addr ? *p_server_addr : _server, + get_rpc_code()?>, + get_first_param()->name?>, + this, + &name?>_client::end_name?>2, + request_hash, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_name?>2( + ::dsn::error_code err, + std::shared_ptr<get_first_param()->get_cpp_type()?>>& get_first_param()->name?>, + std::shared_ptr<get_cpp_return_type()?>>& resp) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply get_rpc_code()?> err : " << err.to_string() << std::endl; + else + { + std::cout << "reply get_rpc_code()?> ok" << std::endl; + } + } + + + + +private: + ::dsn::end_point _server; +}; + + +get_cpp_namespace_end()?> diff --git a/bin/dsn.templates/single/config.ini.php b/bin/dsn.templates/single/config.ini.php new file mode 100644 index 0000000000..58fedfa11c --- /dev/null +++ b/bin/dsn.templates/single/config.ini.php @@ -0,0 +1,71 @@ + +[apps.name?>.server] +name = name?>.server +type = name?>_server +arguments = +ports = 27001 +run = true + +[apps.client] +name = client +type = name?>_client +arguments = localhost 27001 +count = 1 +run = true + +[core] + +tool = simulator +;tool = nativerun +;toollets = tracer +;toollets = tracer, profiler, fault_injector +pause_on_start = false + +logging_factory_name = dsn::tools::screen_logger + +[tools.simulator] +random_seed = 2756568580 +use_given_random_seed = false + +[network] +; how many network threads for network library(used by asio) +io_service_worker_count = 2 + +[network.27001] +; channel = network_header_format, network_provider_name, buffer_block_size +;RPC_CHANNEL_TCP = NET_HDR_DSN, dsn::tools::asio_network_provider, 65536 + +;RPC_CHANNEL_TCP = NET_HDR_THRIFT, dsn::tools::asio_network_provider, 65536 + + +; specification for each thread pool +[threadpool.default] + +[threadpool.THREAD_POOL_DEFAULT] +name = default +partitioned = false +worker_count = 1 +max_input_queue_length = 1024 +worker_priority = THREAD_xPRIORITY_NORMAL + +[task.default] +is_trace = true +is_profile = true +allow_inline = false +rpc_call_channel = RPC_CHANNEL_TCP +fast_execution_in_network_thread = false +rpc_call_header_format_name = dsn +rpc_timeout_milliseconds = 5000 + +[task.LPC_AIO_IMMEDIATE_CALLBACK] +is_trace = false +is_profile = false +allow_inline = false + +[task.LPC_RPC_TIMEOUT] +is_trace = false +is_profile = false diff --git a/bin/dsn.templates/single/main.cpp.php b/bin/dsn.templates/single/main.cpp.php new file mode 100644 index 0000000000..83ea48b47c --- /dev/null +++ b/bin/dsn.templates/single/main.cpp.php @@ -0,0 +1,40 @@ + +// apps +# include ".app.example.h" + +// tools +# include +# include +# include +# include +# include + +int main(int argc, char** argv) +{ + // register all possible service apps + dsn::service::system::register_service<get_cpp_namespace().$_PROG->name?>_server_app>("name?>_server"); + dsn::service::system::register_service<get_cpp_namespace().$_PROG->name?>_client_app>("name?>_client"); + + // register all possible tools and toollets + dsn::tools::register_tool("nativerun"); + dsn::tools::register_tool("simulator"); + dsn::tools::register_toollet("tracer"); + dsn::tools::register_toollet("profiler"); + dsn::tools::register_toollet("fault_injector"); + + // register necessary components +#ifdef DSN_NOT_USE_DEFAULT_SERIALIZATION + + dsn::tools::register_component_provider("thrift"); + +#endif + + // specify what services and tools will run in config file, then run + dsn::service::system::run("config.ini", true); + return 0; +} diff --git a/bin/dsn.templates/single/server.h.php b/bin/dsn.templates/single/server.h.php new file mode 100644 index 0000000000..27a767397e --- /dev/null +++ b/bin/dsn.templates/single/server.h.php @@ -0,0 +1,61 @@ + +# pragma once +# include +# include ".code.definition.h" +# include + +get_cpp_namespace_begin()?> + +services as $svc) { ?> +class name?>_service + : public ::dsn::service::serverlet<name?>_service> +{ +public: + name?>_service() : ::dsn::service::serverlet<name?>_service>("name?>") {} + virtual ~name?>_service() {} + +protected: + // all service handlers to be implemented further +functions as $f) { ?> + // get_rpc_code()?> +is_one_way()) {?> + virtual void on_name?>(const get_first_param()->get_cpp_type()?>& get_first_param()->name?>) + { + std::cout << "... exec get_rpc_code()?> ... (not implemented) " << std::endl; + } + + virtual void on_name?>(const get_first_param()->get_cpp_type()?>& get_first_param()->name?>, ::dsn::service::rpc_replier<get_cpp_return_type()?>>& reply) + { + std::cout << "... exec get_rpc_code()?> ... (not implemented) " << std::endl; + get_cpp_return_type()?> resp; + reply(resp); + } + + + +public: + void open_service() + { +functions as $f) { ?> +is_one_way()) {?> + this->register_rpc_handler(get_rpc_code()?>, "name?>", &name?>_service::on_name?>); + + this->register_async_rpc_handler(get_rpc_code()?>, "name?>", &name?>_service::on_name?>); + + + } + + void close_service() + { +functions as $f) { ?> + this->unregister_rpc_handler(get_rpc_code()?>); + + } +}; + + +get_cpp_namespace_end()?> diff --git a/bin/dsn.templates/type.php b/bin/dsn.templates/type.php new file mode 100644 index 0000000000..65871c306e --- /dev/null +++ b/bin/dsn.templates/type.php @@ -0,0 +1,532 @@ + strlen($haystack)) return FALSE; + else return substr($haystack, 0, strlen($needle)) === $needle; + } + + public static function is_container_type($full_name) + { + return thelpers::begin_with($full_name, "vector<") + || thelpers::begin_with($full_name, "list<") + || thelpers::begin_with($full_name, "map<") + || thelpers::begin_with($full_name, "set<") + ; + } + + public static function get_container_type($full_name) + { + if (thelpers::is_container_type($full_name)) + return trim(substr($full_name, 0, strpos($full_name, "<", 0))); + else + return FALSE; + } + + public static function get_container_key_type($full_name) + { + if (thelpers::is_container_type($full_name)) + { + $pos = strpos($full_name, "<"); + $kvs = trim(substr($full_name, $pos + 1, strrpos($full_name, ">") - $pos - 1)); + if (strpos($kvs, ",") == FALSE) + return $kvs; + else + return trim(substr($kvs, 0, strpos($kvs, ",") - 1)); + } + else + return FALSE; + } + + public static function get_container_value_type($full_name) + { + if (thelpers::is_container_type($full_name)) + { + $pos = strpos($full_name, "<"); + $kvs = trim(substr($full_name, $pos + 1, strrpos($full_name, ">") - $pos - 1)); + if (strpos($kvs, ",") == FALSE) + return FALSE; + else + return trim(substr($kvs, strpos($kvs, ",") + 1)); + } + else + return FALSE; + } + + public static function base_type_to_cpp_type($base_type) + { + //echo "base_type_to_cpp_type'".$base_type."'".PHP_EOL; + switch ($base_type) + { + case "list": return "std::list"; + case "map": return "std::map"; + case "set": return "std::set"; + case "vector": return "std::vector"; + case "string": return "std::string"; + case "double": return "double"; + case "float": return "float"; + case "i64": return "int64_t"; + case "int64": return "int64_t"; + case "int64_t": return "int64_t"; + case "ui64": return "uint64_t"; + case "uint64": return "uint64_t"; + case "uint64_t": return "uint64_t"; + case "i32": return "int32_t"; + case "int32": return "int32_t"; + case "int32_t": return "int32_t"; + case "ui32": return "uint32_t"; + case "uint32": return "uint32_t"; + case "uint32_t": return "uint32_t"; + case "byte": return "byte"; + case "BYTE": return "byte"; + case "Byte": return "byte"; + case "bool": return "bool"; + case "BOOL": return "bool"; + case "Bool": return "bool"; + case "sint32": return "int32_t"; + case "sint64": return "int64_t"; + case "fixed32": return "int32_t"; + case "fixed64": return "int64_t"; + case "sfixed32": return "int32_t"; + case "sfixed64": return "int64_t"; + + default: return $base_type; + } + } + + public static function get_cpp_type_name($full_name) + { + global $_PROG; + if (thelpers::is_container_type($full_name)) + return thelpers::get_cpp_name_internal($full_name); + else + { + $pos = strrpos($full_name, "."); + if (FALSE == $pos) + return thelpers::get_cpp_name_internal($full_name); + else + { + // check cpp namespace as prefix + $prog = NULL; + $left = ""; + if (thelpers::begin_with($full_name, $_PROG->get_namespace("cpp").".")) + { + $left = substr($full_name, strlen($_PROG->get_namespace("cpp")) + 1); + $prog = $_PROG; + } + else + { + foreach ($_PROG->includes as $pn => $p) + { + if (thelpers::begin_with($full_name, $p->get_namespace("cpp").".")) + { + $left = substr($full_name, strlen($p->get_namespace("cpp")) + 1); + $prog = $p; + break; + } + } + } + + // check package as prefix + if ($prog == NULL) + { + if (thelpers::begin_with($full_name, $_PROG->name.".")) + { + $left = substr($full_name, strlen($_PROG->name) + 1); + $prog = $_PROG; + } + else + { + foreach ($_PROG->includes as $pn => $p) + { + if (thelpers::begin_with($full_name, $p->name.".")) + { + $left = substr($full_name, strlen($p->name) + 1); + $prog = $p; + break; + } + } + } + } + + if (NULL == $prog) + { + return "full type translation from '". $full_name. "' failed."; + } + + return $prog == $_PROG ? + thelpers::get_cpp_name_internal($left) : + $prog->get_cpp_namespace() . thelpers::get_cpp_name_internal($left); + } + } + } + + private static function get_cpp_name_internal($full_name) + { + if (thelpers::is_container_type($full_name)) + { + $kt = thelpers::get_container_key_type($full_name); + $vt = thelpers::get_container_value_type($full_name); + $ct = thelpers::get_container_type($full_name); + return thelpers::base_type_to_cpp_type($ct)."< ". + ($vt == FALSE ? thelpers::get_cpp_type_name($kt) + : (thelpers::get_cpp_type_name($kt).", ".thelpers::get_cpp_type_name($vt))) + .">"; + } + else if (FALSE != strpos($full_name, ".")) + { + return str_replace(".", "_", $full_name); + //return substr($full_name, 0, strpos($full_name, ".")) + // ."::".thelpers::get_cpp_name_internal( + // substr($full_name, strpos($full_name, ".") + 1) + // ); + } + else + { + return thelpers::base_type_to_cpp_type($full_name); + } + } +} + + +class t_program +{ + var $name; + var $namespaces; + var $includes; + var $typedefs; + var $enums; + var $structs; + var $services; + var $types; + var $annotations; + + function __construct($name) + { + $this->name = $name; + $this->namespaces = array(); + $this->includes = array(); + $this->typedefs = array(); + $this->enums = array(); + $this->structs = array(); + $this->services = array(); + $this->types = array(); + $this->annotations = array(); + } + + function get_test_task_code() + { + return "LPC" + ."_". strtoupper($this->name) + ."_TEST_TIMER" + ; + } + + function get_cpp_namespace() + { + if (!array_key_exists("cpp", $this->namespaces)) + { + return ""; + } + + $nms = $this->namespaces["cpp"]; + $nms = explode(".", $nms); + $rt = "::"; + foreach ($nms as $nm) + { + $rt .= $nm ."::"; + } + return $rt; + } + + function get_namespace($lang) + { + if (!array_key_exists($lang, $this->namespaces)) + { + return FALSE; + } + + return $this->namespaces[$lang]; + } + + function get_cpp_namespace_begin() + { + if (!array_key_exists("cpp", $this->namespaces)) + { + return ""; + } + + $nms = $this->namespaces["cpp"]; + $nms = explode(".", $nms); + $rt = ""; + foreach ($nms as $nm) + { + $rt .= "namespace ". $nm ." { "; + } + return $rt; + } + + function get_cpp_namespace_end() + { + if (!array_key_exists("cpp", $this->namespaces)) + { + return ""; + } + + $nms = $this->namespaces["cpp"]; + $nms = explode(".", $nms); + $rt = ""; + foreach ($nms as $nm) + { + $rt .= "} "; + } + return $rt; + } + + function add_annotations($atts) + { + $this->annotations = $atts; + + foreach ($this->structs as $s) + { + $s->on_annotations(); + } + + foreach ($this->services as $s) + { + $s->on_annotations(); + } + } +} + +class t_type +{ + var $program; + var $name; + + function __construct($program, $name) + { + if (thelpers::begin_with($name, $program->get_namespace("cpp").".")) + { + $name = substr($name, strlen($program->get_namespace("cpp")) + 1); + } + else if (thelpers::begin_with($name, $program->name.".")) + { + $name = substr($name, strlen($program->name) + 1); + } + + $this->program = $program; + $this->name = $name; + $program->types[] = $this; + } + + function get_cpp_name() + { + $pos = strpos($this->name, "."); + if ($pos == FALSE) + return $this->name; + else + { + $prefix = substr($this->name, 0, $pos); + if (0 == strcmp($prefix, $this->program->name) + || 0 == strcmp($prefix, $this->program->get_namespace("cpp"))) + { + $prefix = substr($this->name, $pos + 1); + } + else + { + $prefix = $this->name; + } + + return str_replace(".", "_", $prefix); + } + } + + function is_void() { return false; } + function is_enum() { return false; } + function is_alias() { return false; } + function is_base_type() { return true; } +} + +class t_typedef extends t_type +{ + var $type; + + function __construct($program, $type, $alias) + { + parent::__construct($program, $alias); + $this->type = $type; + $program->typedefs[] = $this; + } + + function is_alias() { return true; } +} + +class t_enum extends t_type +{ + var $values; + + function __construct($program, $name) + { + parent::__construct($program, $name); + $this->values = array(); + $program->enums[] = $this; + } + + function add_value($name, $value) + { + $this->values[$name] = $value; + } + + function is_enum() { return true; } +} + +class t_field +{ + var $name; + var $type_name; + + function __construct($name, $type_name) + { + $this->name = $name; + $this->type_name = $type_name; + } + + function get_cpp_type() + { + return thelpers::get_cpp_type_name($this->type_name); + } +} + +class t_struct extends t_type +{ + var $fields; + + function __construct($program, $name) + { + parent::__construct($program, $name); + $this->fields = array(); + $program->structs[] = $this; + } + + function add_field($name, $type_name) + { + $this->fields[] = new t_field($name, $type_name); + } + + function is_base_type() { return false; } + + function on_annotations() + { + // nothing to do for now + } +} + +class t_function +{ + var $service; + var $ret; + var $name; + var $params; + var $is_write; + + function __construct($service, $ret, $name) + { + $this->service = $service; + $this->ret = $ret; + $this->name = $name; + $this->is_write = false; + $this->params = array(); + } + + function add_param($name, $type_name) + { + $this->params[] = new t_field($name, $type_name); + } + + function get_cpp_return_type() + { + return thelpers::get_cpp_type_name($this->ret); + } + + function get_first_param() + { + return $this->params[0]; + } + + function get_rpc_code() + { + return "RPC" + ."_". strtoupper($this->service->program->name) + ."_". strtoupper($this->service->name) + ."_". strtoupper($this->name) + ; + } + + function is_one_way() + { + return $this->ret == "void" || $this->ret == "VOID"; + } + + function on_annotations() + { + $atts = $this->service->program->annotations; + + // [function.service.add] + // write = true ; service.add is a write function + $key = "function.".$this->service->name.".".$this->name; + if ($atts[$key] != NULL) + { + $b = $atts[$key]["write"]; + $this->is_write = ($b != NULL && ($b == "1" || $b == 1)); + } + } +} + +class t_service extends t_type +{ + var $functions; + var $is_stateful; + + function __construct($program, $name) + { + parent::__construct($program, $name); + $this->functions = array(); + $this->is_stateful = false; + + $program->services[] = $this; + } + + function add_function($ret, $name) + { + $f = new t_function($this, $ret, $name); + $this->functions[] = $f; + return $f; + } + + function on_annotations() + { + $atts = $this->program->annotations; + + // [service.counter] + // stateful = true ; counter is a stateful service + $key = "service.".$this->name; + if ($atts[$key] != NULL) + { + $b = $atts[$key]["stateful"]; + $this->is_stateful = ($b != NULL && ($b == "1" || $b == 1)); + } + + // continue for each function + foreach ($this->functions as $f) + { + $f->on_annotations(); + } + } +} +?> diff --git a/bin/dsn.templates/types.h.php b/bin/dsn.templates/types.h.php new file mode 100644 index 0000000000..67132748a8 --- /dev/null +++ b/bin/dsn.templates/types.h.php @@ -0,0 +1,109 @@ + +# pragma once + +// +// uncomment the following line if you want to use +// data encoding/decoding from the original tool instead of rDSN +// in this case, you need to use these tools to generate +// type files with --gen=cpp etc. options +// +// !!! WARNING: not feasible for replicated service yet!!! +// +// # define DSN_NOT_USE_DEFAULT_SERIALIZATION + +# include + +# ifdef DSN_NOT_USE_DEFAULT_SERIALIZATION + + +# include +# include "name?>_types.h" + +get_cpp_namespace_begin().PHP_EOL; + +foreach ($_PROG->structs as $s) +{ + echo "\t// ---------- ". $s->name . " -------------". PHP_EOL; + echo "\tinline void marshall(::dsn::binary_writer& writer, const ". $s->get_cpp_name() . "& val)".PHP_EOL; + echo "\t{".PHP_EOL; + echo "\t\tboost::shared_ptr<::dsn::binary_writer_transport> transport(new ::dsn::binary_writer_transport(writer));".PHP_EOL; + echo "\t\t::apache::thrift::protocol::TBinaryProtocol proto(transport);".PHP_EOL; + echo "\t\t::dsn::marshall_rpc_args<".$s->get_cpp_name().">(&proto, val, &".$s->get_cpp_name()."::write);".PHP_EOL; + echo "\t};".PHP_EOL; + echo PHP_EOL; + echo "\tinline void unmarshall(::dsn::binary_reader& reader, __out_param ". $s->get_cpp_name() . "& val)".PHP_EOL; + echo "\t{".PHP_EOL; + echo "\t\tboost::shared_ptr<::dsn::binary_reader_transport> transport(new ::dsn::binary_reader_transport(reader));".PHP_EOL; + echo "\t\t::apache::thrift::protocol::TBinaryProtocol proto(transport);".PHP_EOL; + echo "\t\t::dsn::unmarshall_rpc_args<".$s->get_cpp_name().">(&proto, val, &".$s->get_cpp_name()."::read);".PHP_EOL; + echo "\t};".PHP_EOL; + echo PHP_EOL; +} + +echo $_PROG->get_cpp_namespace_end().PHP_EOL; +?> + + + +# include "name?>.pb.h" +# include + + +# error not supported idl type + + +# else // use rDSN's data encoding/decoding + +get_cpp_namespace_begin().PHP_EOL; + +foreach ($_PROG->enums as $em) +{ + echo "\t// ---------- ". $em->name . " -------------". PHP_EOL; + echo "\tenum ". $em->get_cpp_name() .PHP_EOL; + echo "\t{".PHP_EOL; + foreach ($em->values as $k => $v) { + echo "\t\t". $k . " = " .$v ."," .PHP_EOL; + } + echo "\t};".PHP_EOL; + echo PHP_EOL; + echo "\tDEFINE_POD_SERIALIZATION(". $em->get_cpp_name() .");".PHP_EOL; + echo PHP_EOL; +} + +foreach ($_PROG->structs as $s) +{ + echo "\t// ---------- ". $s->name . " -------------". PHP_EOL; + echo "\tstruct ". $s->get_cpp_name() .PHP_EOL; + echo "\t{".PHP_EOL; + foreach ($s->fields as $fld) { + echo "\t\t". $fld->get_cpp_type() . " " .$fld->name .";" .PHP_EOL; + } + echo "\t};".PHP_EOL; + echo PHP_EOL; + echo "\tinline void marshall(::dsn::binary_writer& writer, const ". $s->get_cpp_name() . "& val)".PHP_EOL; + echo "\t{".PHP_EOL; + foreach ($s->fields as $fld) { + echo "\t\tmarshall(writer, val." .$fld->name .");" .PHP_EOL; + } + echo "\t};".PHP_EOL; + echo PHP_EOL; + echo "\tinline void unmarshall(::dsn::binary_reader& reader, __out_param ". $s->get_cpp_name() . "& val)".PHP_EOL; + echo "\t{".PHP_EOL; + foreach ($s->fields as $fld) { + echo "\t\tunmarshall(reader, val." .$fld->name .");" .PHP_EOL; + } + echo "\t};".PHP_EOL; + echo PHP_EOL; +} + +echo $_PROG->get_cpp_namespace_end().PHP_EOL; +?> + +#endif diff --git a/docs/Architecture.docx b/docs/Architecture.docx new file mode 100644 index 0000000000..dec04e9f46 Binary files /dev/null and b/docs/Architecture.docx differ diff --git a/docs/Microsoft Makes rDSN Open Source_v5.docx b/docs/Microsoft Makes rDSN Open Source_v5.docx new file mode 100644 index 0000000000..23a48e2a2b Binary files /dev/null and b/docs/Microsoft Makes rDSN Open Source_v5.docx differ diff --git a/docs/Microsoft Makes rDSN Open Source_v6.docx b/docs/Microsoft Makes rDSN Open Source_v6.docx new file mode 100644 index 0000000000..565a52a313 Binary files /dev/null and b/docs/Microsoft Makes rDSN Open Source_v6.docx differ diff --git "a/docs/\345\276\256\350\275\257\345\274\200\346\272\220rDSN\345\210\206\345\270\203\345\274\217\347\263\273\347\273\237\345\274\200\345\217\221\346\241\206\346\236\266_v5.docx" "b/docs/\345\276\256\350\275\257\345\274\200\346\272\220rDSN\345\210\206\345\270\203\345\274\217\347\263\273\347\273\237\345\274\200\345\217\221\346\241\206\346\236\266_v5.docx" new file mode 100644 index 0000000000..08c754bc40 Binary files /dev/null and "b/docs/\345\276\256\350\275\257\345\274\200\346\272\220rDSN\345\210\206\345\270\203\345\274\217\347\263\273\347\273\237\345\274\200\345\217\221\346\241\206\346\236\266_v5.docx" differ diff --git "a/docs/\345\276\256\350\275\257\345\274\200\346\272\220rDSN\345\210\206\345\270\203\345\274\217\347\263\273\347\273\237\345\274\200\345\217\221\346\241\206\346\236\266_v6.docx" "b/docs/\345\276\256\350\275\257\345\274\200\346\272\220rDSN\345\210\206\345\270\203\345\274\217\347\263\273\347\273\237\345\274\200\345\217\221\346\241\206\346\236\266_v6.docx" new file mode 100644 index 0000000000..dd6ff70438 Binary files /dev/null and "b/docs/\345\276\256\350\275\257\345\274\200\346\272\220rDSN\345\210\206\345\270\203\345\274\217\347\263\273\347\273\237\345\274\200\345\217\221\346\241\206\346\236\266_v6.docx" differ diff --git a/include/dsn/dist/failure_detector.h b/include/dsn/dist/failure_detector.h new file mode 100644 index 0000000000..983b1a3694 --- /dev/null +++ b/include/dsn/dist/failure_detector.h @@ -0,0 +1,28 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include diff --git a/include/dsn/dist/failure_detector/failure_detector.h b/include/dsn/dist/failure_detector/failure_detector.h new file mode 100644 index 0000000000..9f2b712ef9 --- /dev/null +++ b/include/dsn/dist/failure_detector/failure_detector.h @@ -0,0 +1,171 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include + +namespace dsn { namespace fd { + +DEFINE_THREAD_POOL_CODE(THREAD_POOL_FD) +DEFINE_TASK_CODE(LPC_BEACON_CHECK, TASK_PRIORITY_HIGH, THREAD_POOL_FD) + +class failure_detector_callback +{ +public: + // client side + virtual void on_master_disconnected( const std::vector& nodes ) = 0; + virtual void on_master_connected( const end_point& node) = 0; + + // server side + virtual void on_worker_disconnected( const std::vector& nodes ) = 0; + virtual void on_worker_connected( const end_point& node ) = 0; +}; + +class failure_detector : + public failure_detector_service, + public failure_detector_client, + public failure_detector_callback +{ +public: + failure_detector(); + + virtual void on_ping(const beacon_msg& beacon, ::dsn::service::rpc_replier& reply); + + virtual void end_ping(::dsn::error_code err, const beacon_ack& ack, void* context); + +public: + int start( + uint32_t check_interval_seconds, + uint32_t beacon_interval_seconds, + uint32_t lease_seconds, + uint32_t grace_seconds, + bool use_allow_list = false + ); + + int stop(); + + void register_master(const end_point& target); + + bool switch_master(const end_point& from, const end_point& to); + + bool unregister_master( const end_point& node); + + bool is_master_connected( const end_point& node) const; + + // ATTENTION: be very careful to set is_connected to false as + // workers are always considered *connected* initially which is ok even when workers think master is disconnected + // Considering workers *disconnected* initially is *dangerous* coz it may violate the invariance when workers think they are online + void register_worker( const end_point& node, bool is_connected = true); + + bool unregister_worker( const end_point& node); + + void clear_workers(); + + bool is_worker_connected( const end_point& node) const; + + void add_allow_list( const end_point& node); + + bool remove_from_allow_list( const end_point& node); + + int worker_count() const { return static_cast(_workers.size()); } + + int master_count() const { return static_cast(_masters.size()); } + +protected: + void on_ping_internal(const beacon_msg& beacon, __out_param beacon_ack& ack); + + bool is_time_greater_than(uint64_t ts, uint64_t base); + + void report(const end_point& node, bool is_master, bool is_connected); + +private: + void process_all_records(); + +private: + class master_record + { + public: + end_point node; + uint64_t last_send_time_for_beacon_with_ack; + uint64_t next_beacon_time; + bool is_alive; + bool rejected; + + // masters are always considered *disconnected* initially which is ok even when master thinks workers are connected + master_record(const end_point& n, uint64_t last_send_time_for_beacon_with_ack_, uint64_t next_beacon_time_) + { + node = n; + last_send_time_for_beacon_with_ack = last_send_time_for_beacon_with_ack_; + next_beacon_time = next_beacon_time_; + is_alive = false; + rejected = false; + } + }; + + class worker_record + { + public: + end_point node; + uint64_t last_beacon_recv_time; + bool is_alive; + + // workers are always considered *connected* initially which is ok even when workers think master is disconnected + worker_record(const end_point& node, uint64_t last_beacon_recv_time) + { + this->node = node; + this->last_beacon_recv_time = last_beacon_recv_time; + is_alive = true; + } + }; + +private: + typedef std::map master_map; + typedef std::map worker_map; + + // allow list are set on machine name (port can vary) + typedef std::set allow_list; + + mutable service::zlock _lock; + master_map _masters; + worker_map _workers; + + uint32_t _beacon_interval_milliseconds; + uint32_t _check_interval_milliseconds; + uint32_t _lease_milliseconds; + uint32_t _grace_milliseconds; + bool _is_started; + task_ptr _current_task; + + bool _use_allow_list; + allow_list _allow_list; + +protected: + // subClass can rewrite these method. + virtual void send_beacon(const end_point& node, uint64_t time); +}; + +}} // end namespace diff --git a/include/dsn/dist/failure_detector/fd.client.h b/include/dsn/dist/failure_detector/fd.client.h new file mode 100644 index 0000000000..a0eaccdd9b --- /dev/null +++ b/include/dsn/dist/failure_detector/fd.client.h @@ -0,0 +1,133 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once +# include +# include +# include + + +namespace dsn { namespace fd { +class failure_detector_client + : public virtual ::dsn::service::servicelet +{ +public: + failure_detector_client(const ::dsn::end_point& server) { _server = server; } + failure_detector_client() { _server = ::dsn::end_point::INVALID; } + virtual ~failure_detector_client() {} + + + // ---------- call RPC_FD_FAILURE_DETECTOR_PING ------------ + // - synchronous + ::dsn::error_code ping( + const ::dsn::fd::beacon_msg& beacon, + __out_param ::dsn::fd::beacon_ack& resp, + int timeout_milliseconds = 0, + int hash = 0, + const ::dsn::end_point *p_server_addr = nullptr) + { + ::dsn::message_ptr msg = ::dsn::message::create_request(RPC_FD_FAILURE_DETECTOR_PING, timeout_milliseconds, hash); + marshall(msg->writer(), beacon); + auto resp_task = ::dsn::service::rpc::call(p_server_addr ? *p_server_addr : _server, msg, nullptr); + resp_task->wait(); + if (resp_task->error() == ::dsn::ERR_SUCCESS) + { + unmarshall(resp_task->get_response()->reader(), resp); + } + return resp_task->error(); + } + + // - asynchronous with on-stack ::dsn::fd::beacon_msg and ::dsn::fd::beacon_ack + ::dsn::rpc_response_task_ptr begin_ping( + const ::dsn::fd::beacon_msg& beacon, + void* context, + int timeout_milliseconds = 0, + int reply_hash = 0, + int request_hash = 0, + const ::dsn::end_point *p_server_addr = nullptr) + { + return ::dsn::service::rpc::call_typed( + p_server_addr ? *p_server_addr : _server, + RPC_FD_FAILURE_DETECTOR_PING, + beacon, + this, + &failure_detector_client::end_ping, + context, + request_hash, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_ping( + ::dsn::error_code err, + const ::dsn::fd::beacon_ack& resp, + void* context) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply RPC_FD_FAILURE_DETECTOR_PING err : " << err.to_string() << std::endl; + else + { + std::cout << "reply RPC_FD_FAILURE_DETECTOR_PING ok" << std::endl; + } + } + + // - asynchronous with on-heap std::shared_ptr<::dsn::fd::beacon_msg> and std::shared_ptr<::dsn::fd::beacon_ack> + ::dsn::rpc_response_task_ptr begin_ping2( + std::shared_ptr<::dsn::fd::beacon_msg>& beacon, + int timeout_milliseconds = 0, + int reply_hash = 0, + int request_hash = 0, + const ::dsn::end_point *p_server_addr = nullptr) + { + return ::dsn::service::rpc::call_typed( + p_server_addr ? *p_server_addr : _server, + RPC_FD_FAILURE_DETECTOR_PING, + beacon, + this, + &failure_detector_client::end_ping2, + request_hash, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_ping2( + ::dsn::error_code err, + std::shared_ptr<::dsn::fd::beacon_msg>& beacon, + std::shared_ptr<::dsn::fd::beacon_ack>& resp) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply RPC_FD_FAILURE_DETECTOR_PING err : " << err.to_string() << std::endl; + else + { + std::cout << "reply RPC_FD_FAILURE_DETECTOR_PING ok" << std::endl; + } + } + + +private: + ::dsn::end_point _server; +}; + +} } \ No newline at end of file diff --git a/include/dsn/dist/failure_detector/fd.code.definition.h b/include/dsn/dist/failure_detector/fd.code.definition.h new file mode 100644 index 0000000000..bfc749e570 --- /dev/null +++ b/include/dsn/dist/failure_detector/fd.code.definition.h @@ -0,0 +1,35 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once +# include +# include + +namespace dsn { namespace fd { + // define RPC task code for service 'failure_detector' + DEFINE_TASK_CODE_RPC(RPC_FD_FAILURE_DETECTOR_PING, ::dsn::TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) + // test timer task code + DEFINE_TASK_CODE(LPC_FD_TEST_TIMER, ::dsn::TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) +} } diff --git a/include/dsn/dist/failure_detector/fd.server.h b/include/dsn/dist/failure_detector/fd.server.h new file mode 100644 index 0000000000..a08a5289d8 --- /dev/null +++ b/include/dsn/dist/failure_detector/fd.server.h @@ -0,0 +1,61 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once +# include +# include +# include + +namespace dsn { namespace fd { +class failure_detector_service + : public ::dsn::service::serverlet +{ +public: + failure_detector_service() : ::dsn::service::serverlet("failure_detector") {} + virtual ~failure_detector_service() {} + +protected: + // all service handlers to be implemented further + // RPC_FD_FAILURE_DETECTOR_PING + virtual void on_ping(const ::dsn::fd::beacon_msg& beacon, ::dsn::service::rpc_replier<::dsn::fd::beacon_ack>& reply) + { + std::cout << "... exec RPC_FD_FAILURE_DETECTOR_PING ... (not implemented) " << std::endl; + ::dsn::fd::beacon_ack resp; + reply(resp); + } + +public: + void open_service() + { + this->register_async_rpc_handler(RPC_FD_FAILURE_DETECTOR_PING, "ping", &failure_detector_service::on_ping); + } + + void close_service() + { + this->unregister_rpc_handler(RPC_FD_FAILURE_DETECTOR_PING); + } +}; + +} } \ No newline at end of file diff --git a/include/dsn/dist/failure_detector/fd.types.h b/include/dsn/dist/failure_detector/fd.types.h new file mode 100644 index 0000000000..b5211322bb --- /dev/null +++ b/include/dsn/dist/failure_detector/fd.types.h @@ -0,0 +1,80 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once +# include + +namespace dsn { namespace fd { + // ---------- beacon_msg ------------- + struct beacon_msg + { + int64_t time; + ::dsn::end_point from; + ::dsn::end_point to; + }; + + inline void marshall(::dsn::binary_writer& writer, const beacon_msg& val) + { + marshall(writer, val.time); + marshall(writer, val.from); + marshall(writer, val.to); + }; + + inline void unmarshall(::dsn::binary_reader& reader, __out_param beacon_msg& val) + { + unmarshall(reader, val.time); + unmarshall(reader, val.from); + unmarshall(reader, val.to); + }; + + // ---------- beacon_ack ------------- + struct beacon_ack + { + int64_t time; + ::dsn::end_point this_node; + ::dsn::end_point primary_node; + bool is_master; + bool allowed; + }; + + inline void marshall(::dsn::binary_writer& writer, const beacon_ack& val) + { + marshall(writer, val.time); + marshall(writer, val.this_node); + marshall(writer, val.primary_node); + marshall(writer, val.is_master); + marshall(writer, val.allowed); + }; + + inline void unmarshall(::dsn::binary_reader& reader, __out_param beacon_ack& val) + { + unmarshall(reader, val.time); + unmarshall(reader, val.this_node); + unmarshall(reader, val.primary_node); + unmarshall(reader, val.is_master); + unmarshall(reader, val.allowed); + }; + +} } diff --git a/include/dsn/dist/replication.h b/include/dsn/dist/replication.h new file mode 100644 index 0000000000..3bf2d7cdd1 --- /dev/null +++ b/include/dsn/dist/replication.h @@ -0,0 +1,33 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +# include +# include +# include + + diff --git a/include/dsn/dist/replication/meta_service_app.h b/include/dsn/dist/replication/meta_service_app.h new file mode 100644 index 0000000000..99294047b2 --- /dev/null +++ b/include/dsn/dist/replication/meta_service_app.h @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include + +class server_state; +class meta_service; + +namespace dsn { + namespace service { + + class meta_service_app : public service_app + { + public: + meta_service_app(service_app_spec* s); + + ~meta_service_app(); + + virtual error_code start(int argc, char** argv) override; + + virtual void stop(bool cleanup = false) override; + + private: + static server_state *_reliable_state; + meta_service* _service; + }; + + } +} + diff --git a/include/dsn/dist/replication/replication.codes.h b/include/dsn/dist/replication/replication.codes.h new file mode 100644 index 0000000000..cf262a7ae2 --- /dev/null +++ b/include/dsn/dist/replication/replication.codes.h @@ -0,0 +1,102 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include + +DEFINE_THREAD_POOL_CODE(THREAD_POOL_META_SERVER) +//DEFINE_THREAD_POOL_CODE(THREAD_POOL_REPLICATION) +DEFINE_THREAD_POOL_CODE(THREAD_POOL_LOCAL_APP) +DEFINE_THREAD_POOL_CODE(THREAD_POOL_REPLICATION_LONG) + +#define MAKE_EVENT_CODE(x, pri) DEFINE_TASK_CODE(x, pri, CURRENT_THREAD_POOL) +#define MAKE_EVENT_CODE_AIO(x, pri) DEFINE_TASK_CODE_AIO(x, pri, CURRENT_THREAD_POOL) +#define MAKE_EVENT_CODE_RPC(x, pri) DEFINE_TASK_CODE_RPC(x, pri, CURRENT_THREAD_POOL) + +// THREAD_POOL_DEFAULT +#define CURRENT_THREAD_POOL dsn::THREAD_POOL_DEFAULT +MAKE_EVENT_CODE(LPC_MUTATION_LOG_PENDING_TIMER, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_AIO(LPC_ASYNC_READ_COMPLETE, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_AIO(LPC_ASYNC_WRITE_COMPLETE, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE(LPC_REPLICA_STATE_CHANGE_NOTIFICATION, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_REPORT, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_TEST, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_TEST2, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_TEST3, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_TEST4, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE(LPC_TEST, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_TEST_AGENT_WRITE, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_TEST_AGENT_READ, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_AIO(LPC_AIO_TEST, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_AIO(LPC_AIO_IMMEDIATE_CALLBACK, dsn::TASK_PRIORITY_COMMON) +#undef CURRENT_THREAD_POOL + +// THREAD_POOL_META_SERVER +#define CURRENT_THREAD_POOL THREAD_POOL_META_SERVER +MAKE_EVENT_CODE_RPC(RPC_CM_CALL, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_CM_QUERY_NODE_PARTITIONS, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_CM_UPDATE_PARTITION_CONFIGURATION, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_AIO(LPC_CM_LOG_UPDATE, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE(LPC_LBM_RUN, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE(LPC_QUERY_PN_DECREE, dsn::TASK_PRIORITY_COMMON) +#undef CURRENT_THREAD_POOL + +// THREAD_POOL_REPLICATION +#define CURRENT_THREAD_POOL THREAD_POOL_REPLICATION +MAKE_EVENT_CODE(LPC_MUTATION_PENDING_TIMER, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE(LPC_GROUP_CHECK, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE(LPC_CM_DISCONNECTED_SCATTER, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE(LPC_QUERY_NODE_CONFIGURATION_SCATTER, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE(LPC_LEARN_REMOTE_DELTA_FILES_COMPLETED, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE(LPC_SIM_UPDATE_PARTITION_CONFIGURATION_REPLY, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_AIO(LPC_WRITE_REPLICATION_LOG, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE_AIO(LPC_LERARN_REMOTE_DISK_STATE, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE(LPC_QUERY_CONFIGURATION_ALL, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE_RPC(RPC_REPLICATION_CLIENT_WRITE, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_CONFIG_PROPOSAL, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE_RPC(RPC_QUERY_PN_DECREE, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE_RPC(RPC_PREPARE, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE_RPC(RPC_GROUP_CHECK, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_LEARN, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE_RPC(RPC_LEARN_COMPLETITION_NOTIFY, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE_RPC(RPC_LEARN_ADD_LEARNER, dsn::TASK_PRIORITY_HIGH) +MAKE_EVENT_CODE_RPC(RPC_REMOVE_REPLICA, dsn::TASK_PRIORITY_COMMON) +#undef CURRENT_THREAD_POOL + +// THREAD_POOL_LOCAL_APP +#define CURRENT_THREAD_POOL THREAD_POOL_LOCAL_APP +MAKE_EVENT_CODE(LPC_WRITE, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE_RPC(RPC_REPLICATION_CLIENT_READ, dsn::TASK_PRIORITY_COMMON) +#undef CURRENT_THREAD_POOL + +// THREAD_POOL_REPLICATION_LONG +#define CURRENT_THREAD_POOL THREAD_POOL_REPLICATION_LONG +MAKE_EVENT_CODE(LPC_LEARN_REMOTE_DELTA_FILES, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE(LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE(LPC_OPEN_REPLICA, dsn::TASK_PRIORITY_COMMON) +MAKE_EVENT_CODE(LPC_CLOSE_REPLICA, dsn::TASK_PRIORITY_COMMON) +#undef CURRENT_THREAD_POOL diff --git a/include/dsn/dist/replication/replication_app_base.h b/include/dsn/dist/replication/replication_app_base.h new file mode 100644 index 0000000000..87227bf97c --- /dev/null +++ b/include/dsn/dist/replication/replication_app_base.h @@ -0,0 +1,161 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +// +// replication_app_base is the base class for all app to be replicated using +// this library +// + +# include +# include +# include +# include + +namespace dsn { namespace replication { + +using namespace ::dsn::service; + +class replication_app_base +{ +public: + template static replication_app_base* create(::dsn::replication::replica* replica, ::dsn::configuration_ptr& config) + { + return new T(replica, config); + } + +public: + replication_app_base(::dsn::replication::replica* replica, ::dsn::configuration_ptr& config); + virtual ~replication_app_base() {} + + // + // interfaces to be implemented by app + // all return values are error code + // + virtual int open(bool create_new) = 0; // singel threaded + virtual int close(bool clear_state) = 0; // must be thread-safe + + // update _last_durable_decree internally + virtual int flush(bool force) = 0; // must be thread-safe + + // + // helper routines to accelerate learning + // + virtual void prepare_learning_request(__out_param ::dsn::blob& learn_req) {}; + virtual int get_learn_state(::dsn::replication::decree start, const ::dsn::blob& learn_req, __out_param ::dsn::replication::learn_state& state) = 0; // must be thread-safe + virtual int apply_learn_state(::dsn::replication::learn_state& state) = 0; // must be thread-safe, and last_committed_decree must equal to last_durable_decree after learning + + // + // queries + // + virtual ::dsn::replication::decree last_committed_decree() const { return _last_committed_decree.load(); } + virtual ::dsn::replication::decree last_durable_decree() const { return _last_durable_decree.load(); } + +public: + // + // utility functions to be used by app + // + const std::string& dir() const {return _dir;} + +protected: + template + void register_async_rpc_handler( + task_code code, + const char* name, + void (T::*callback)(const TRequest&, rpc_replier&) + ); + + void unregister_rpc_handler(task_code code); + +private: + template + void internal_rpc_handler( + message_ptr& request, + message_ptr& response, + void (T::*callback)(const TRequest&, rpc_replier&) + ); + +private: + // routines for replica internal usage + friend class replica; + int write_internal(mutation_ptr& mu, bool ack_client); + int dispatch_rpc_call(int code, message_ptr& request, bool ack_client); + +private: + std::string _dir; + replica* _replica; + std::map > _handlers; + +protected: + std::atomic _last_committed_decree; + std::atomic _last_durable_decree; +}; + +typedef replication_app_base* (*replica_app_factory)(replica*, configuration_ptr&); +extern void register_replica_provider(replica_app_factory f, const char* name); + +template +inline void register_replica_provider(const char* name) +{ + register_replica_provider(&replication_app_base::template create, name); +} + +//------------------ inline implementation --------------------- +template +inline void replication_app_base::register_async_rpc_handler( + task_code code, + const char* name, + void (T::*callback)(const TRequest&, rpc_replier&) + ) +{ + _handlers[code] = std::bind( + &replication_app_base::internal_rpc_handler, + this, + std::placeholders::_1, + std::placeholders::_2, + callback + ); +} + +inline void replication_app_base::unregister_rpc_handler(task_code code) +{ + _handlers.erase(code); +} + +template +inline void replication_app_base::internal_rpc_handler( + message_ptr& request, + message_ptr& response, + void (T::*callback)(const TRequest&, rpc_replier&)) +{ + TRequest req; + unmarshall(request->reader(), req); + + rpc_replier replier(request, response); + (static_cast(this)->*callback)(req, replier); +} + +}} // namespace diff --git a/include/dsn/dist/replication/replication_app_client_base.h b/include/dsn/dist/replication/replication_app_client_base.h new file mode 100644 index 0000000000..5a5edb1ab5 --- /dev/null +++ b/include/dsn/dist/replication/replication_app_client_base.h @@ -0,0 +1,403 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +// +// replication_app_client_base is the base class for clients for +// all app to be replicated using this library +// + +# include +# include +# include +# include + +namespace dsn { namespace replication { + + DEFINE_ERR_CODE(ERR_REPLICATION_FAILURE) + +#pragma pack(push, 4) + class replication_app_client_base : public virtual servicelet + { + public: + static void load_meta_servers(configuration_ptr& cf, __out_param std::vector& servers); + + public: + replication_app_client_base( + const std::vector& meta_servers, + const char* app_name + ); + + ~replication_app_client_base(); + + template + rpc_response_task_ptr write( + int partition_index, + task_code code, + std::shared_ptr& req, + + // callback + T* owner, + void (T::*callback)(error_code, std::shared_ptr&, std::shared_ptr&), + + // other specific parameters + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + timeout_milliseconds = (timeout_milliseconds != 0 ? timeout_milliseconds : task_spec::get(code)->rpc_timeout_milliseconds); + message_ptr msg = message::create_request(RPC_REPLICATION_CLIENT_WRITE, timeout_milliseconds); + + rpc_response_task_ptr task; + if (callback == nullptr) + task = new rpc_response_task_empty(msg); + else + task = new ::dsn::service::rpc::internal_use_only::service_rpc_response_task1( + owner, + req, + callback, + msg + ); + auto rc = create_write_context(partition_index, code, task, reply_hash); + marshall(msg->writer(), *req); + call(rc); + return task; + } + + template + rpc_response_task_ptr write( + int partition_index, + task_code code, + std::shared_ptr& req, + + // callback + servicelet* owner, + std::function&, std::shared_ptr&)> callback, + + // other specific parameters + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + timeout_milliseconds = (timeout_milliseconds != 0 ? timeout_milliseconds : task_spec::get(code)->rpc_timeout_milliseconds); + message_ptr msg = message::create_request(RPC_REPLICATION_CLIENT_WRITE, timeout_milliseconds); + + rpc_response_task_ptr task; + if (callback == nullptr) + task = new rpc_response_task_empty(msg); + else + task = new ::dsn::service::rpc::internal_use_only::service_rpc_response_task2( + owner, + req, + callback, + msg + ); + auto rc = create_write_context(partition_index, code, task, reply_hash); + marshall(msg->writer(), *req); + call(rc); + return task; + } + + template + rpc_response_task_ptr write( + int partition_index, + task_code code, + const TRequest& req, + + // callback + T* owner, + void (T::*callback)(error_code, const TResponse&, void*), + void* context, + + // other specific parameters + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + timeout_milliseconds = (timeout_milliseconds != 0 ? timeout_milliseconds : task_spec::get(code)->rpc_timeout_milliseconds); + message_ptr msg = message::create_request(RPC_REPLICATION_CLIENT_WRITE, timeout_milliseconds); + + rpc_response_task_ptr task; + if (callback == nullptr) + task = new rpc_response_task_empty(msg); + else + task = new ::dsn::service::rpc::internal_use_only::service_rpc_response_task5( + owner, + callback, + context, + msg + ); + auto rc = create_write_context(partition_index, code, task, reply_hash); + marshall(msg->writer(), req); + call(rc); + return task; + } + + template + rpc_response_task_ptr write( + int partition_index, + task_code code, + const TRequest& req, + + // callback + servicelet* owner, + std::function callback, + void* context, + + // other specific parameters + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + timeout_milliseconds = (timeout_milliseconds != 0 ? timeout_milliseconds : task_spec::get(code)->rpc_timeout_milliseconds); + message_ptr msg = message::create_request(RPC_REPLICATION_CLIENT_WRITE, timeout_milliseconds); + + rpc_response_task_ptr task; + if (callback == nullptr) + task = new rpc_response_task_empty(msg); + else + task = new ::dsn::service::rpc::internal_use_only::service_rpc_response_task3( + owner, + callback, + context, + msg + ); + auto rc = create_write_context(partition_index, code, task, reply_hash); + marshall(msg->writer(), req); + call(rc); + return task; + } + + template + rpc_response_task_ptr read( + int partition_index, + task_code code, + std::shared_ptr& req, + + // callback + T* owner, + void (T::*callback)(error_code, std::shared_ptr&, std::shared_ptr&), + + // other specific parameters + int timeout_milliseconds = 0, + int reply_hash = 0, + read_semantic_t read_semantic = ReadOutdated, + decree snapshot_decree = invalid_decree // only used when ReadSnapshot + ) + { + timeout_milliseconds = (timeout_milliseconds != 0 ? timeout_milliseconds : task_spec::get(code)->rpc_timeout_milliseconds); + message_ptr msg = message::create_request(RPC_REPLICATION_CLIENT_READ, timeout_milliseconds); + + rpc_response_task_ptr task; + if (callback == nullptr) + task = new rpc_response_task_empty(msg); + else + task = new ::dsn::service::rpc::internal_use_only::service_rpc_response_task1( + owner, + req, + callback, + msg + ); + auto rc = create_read_context(partition_index, code, task, read_semantic, snapshot_decree, reply_hash); + marshall(msg->writer(), *req); + call(rc); + return task; + } + + template + rpc_response_task_ptr read( + int partition_index, + task_code code, + std::shared_ptr& req, + + // callback + servicelet* owner, + std::function&, std::shared_ptr&)> callback, + + // other specific parameters + int timeout_milliseconds = 0, + int reply_hash = 0, + read_semantic_t read_semantic = ReadOutdated, + decree snapshot_decree = invalid_decree // only used when ReadSnapshot + ) + { + timeout_milliseconds = (timeout_milliseconds != 0 ? timeout_milliseconds : task_spec::get(code)->rpc_timeout_milliseconds); + message_ptr msg = message::create_request(RPC_REPLICATION_CLIENT_READ, timeout_milliseconds); + + rpc_response_task_ptr task; + if (callback == nullptr) + task = new rpc_response_task_empty(msg); + else + task = new ::dsn::service::rpc::internal_use_only::service_rpc_response_task2( + owner, + req, + callback, + msg + ); + auto rc = create_read_context(partition_index, code, task, read_semantic, snapshot_decree, reply_hash); + marshall(msg->writer(), *req); + call(rc); + return task; + } + + template + rpc_response_task_ptr read( + int partition_index, + task_code code, + const TRequest& req, + + // callback + T* owner, + void (T::*callback)(error_code, const TResponse&, void*), + void* context, + + // other specific parameters + int timeout_milliseconds = 0, + int reply_hash = 0, + read_semantic_t read_semantic = ReadOutdated, + decree snapshot_decree = invalid_decree // only used when ReadSnapshot + ) + { + timeout_milliseconds = (timeout_milliseconds != 0 ? timeout_milliseconds : task_spec::get(code)->rpc_timeout_milliseconds); + message_ptr msg = message::create_request(RPC_REPLICATION_CLIENT_READ, timeout_milliseconds); + + rpc_response_task_ptr task; + if (callback == nullptr) + task = new rpc_response_task_empty(msg); + else + task = new ::dsn::service::rpc::internal_use_only::service_rpc_response_task5( + owner, + callback, + context, + msg + ); + auto rc = create_read_context(partition_index, code, task, read_semantic, snapshot_decree, reply_hash); + marshall(msg->writer(), req); + call(rc); + return task; + } + + template + rpc_response_task_ptr read( + int partition_index, + task_code code, + const TRequest& req, + + // callback + servicelet* owner, + std::function callback, + void* context, + + // other specific parameters + int timeout_milliseconds = 0, + int reply_hash = 0, + read_semantic_t read_semantic = ReadOutdated, + decree snapshot_decree = invalid_decree // only used when ReadSnapshot + ) + { + timeout_milliseconds = (timeout_milliseconds != 0 ? timeout_milliseconds : task_spec::get(code)->rpc_timeout_milliseconds); + message_ptr msg = message::create_request(RPC_REPLICATION_CLIENT_READ, timeout_milliseconds); + + rpc_response_task_ptr task; + if (callback == nullptr) + task = new rpc_response_task_empty(msg); + else + task = new ::dsn::service::rpc::internal_use_only::service_rpc_response_task3( + owner, + callback, + context, + msg + ); + auto rc = create_read_context(partition_index, code, task, read_semantic, snapshot_decree, reply_hash); + marshall(msg->writer(), req); + call(rc); + return task; + } + + // get read address policy + virtual end_point get_read_address(read_semantic_t semantic, const partition_configuration& config); + + private: + struct request_context + { + int partition_index; + rpc_response_task_ptr callback_task; + read_request_header read_header; + write_request_header write_header; + bool is_read; + uint16_t header_pos; // write header after body is written + task_ptr timeout_timer; // when partition config is unknown at the first place + }; + + struct partition_context + { + rpc_response_task_ptr query_config_task; + std::list requests; + }; + + typedef std::map pending_requests; + + mutable zlock _requests_lock; + pending_requests _pending_requests; + + private: + request_context* create_write_context( + int partition_index, + task_code code, + rpc_response_task_ptr callback, + int reply_hash = 0 + ); + + request_context* create_read_context( + int partition_index, + task_code code, + rpc_response_task_ptr callback, + read_semantic_t read_semantic = ReadOutdated, + decree snapshot_decree = invalid_decree, // only used when ReadSnapshot + int reply_hash = 0 + ); + + private: + std::string _app_name; + std::vector _meta_servers; + + mutable zrwlock _config_lock; + std::map _config_cache; + int _app_id; + end_point _last_contact_point; + + private: + void call(request_context* request, bool no_delay = true); + error_code get_address(int pidx, bool is_write, __out_param end_point& addr, __out_param int& app_id, read_semantic_t semantic = read_semantic_t::ReadLastUpdate); + void on_user_request_timeout(request_context* rc); + void query_partition_configuration_reply(error_code err, message_ptr& request, message_ptr& response, int pidx); + void replica_rw_reply(error_code err, message_ptr& request, message_ptr& response, request_context* rc); + void end_request(request_context* request, error_code err, message_ptr& resp); + void clear_all_pending_tasks(); + }; +#pragma pack(pop) + +}} // namespace diff --git a/include/dsn/dist/replication/replication_other_types.h b/include/dsn/dist/replication/replication_other_types.h new file mode 100644 index 0000000000..128187a373 --- /dev/null +++ b/include/dsn/dist/replication/replication_other_types.h @@ -0,0 +1,58 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +/** + * Autogenerated by Thrift Compiler (@PACKAGE_VERSION@) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +#ifndef replication_OTHER_TYPES_H +#define replication_OTHER_TYPES_H + +namespace dsn { + namespace replication { + + typedef int32_t app_id; + typedef int64_t ballot; + typedef int64_t decree; + + #define invalid_ballot ((ballot)-1LL) + #define invalid_decree ((decree)-1LL) + #define invalid_offset (-1LL) + + class replica; + typedef boost::intrusive_ptr replica_ptr; + + class replica_stub; + typedef boost::intrusive_ptr replica_stub_ptr; + + class mutation; + typedef boost::intrusive_ptr mutation_ptr; + + } +} // namespace + +#endif diff --git a/include/dsn/dist/replication/replication_service_app.h b/include/dsn/dist/replication/replication_service_app.h new file mode 100644 index 0000000000..d1f55dd5be --- /dev/null +++ b/include/dsn/dist/replication/replication_service_app.h @@ -0,0 +1,53 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include +#include + +using namespace dsn::service; +using namespace dsn; + +namespace dsn { namespace replication { + +class replication_service_app : public dsn::service::service_app +{ +public: + replication_service_app(service_app_spec* s); + + ~replication_service_app(void); + + virtual error_code start(int argc, char** argv) override; + + virtual void stop(bool cleanup = false) override; + +private: + replica_stub_ptr _stub; +}; + +}} + + diff --git a/include/dsn/dist/replication/replication_types.h b/include/dsn/dist/replication/replication_types.h new file mode 100644 index 0000000000..59b973f14d --- /dev/null +++ b/include/dsn/dist/replication/replication_types.h @@ -0,0 +1,1384 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +/** + * Autogenerated by Thrift Compiler (@PACKAGE_VERSION@) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +#ifndef replication_TYPES_H +#define replication_TYPES_H + +# include + +DEFINE_THREAD_POOL_CODE(THREAD_POOL_REPLICATION) + + + +namespace dsn { namespace replication { + +enum partition_status { + PS_INACTIVE = 0, + PS_ERROR = 1, + PS_PRIMARY = 2, + PS_SECONDARY = 3, + PS_POTENTIAL_SECONDARY = 4, + PS_INVALID = 5 +}; + +DEFINE_POD_SERIALIZATION(partition_status); + +enum read_semantic_t { + ReadLastUpdate = 0, + ReadOutdated = 1, + ReadSnapshot = 2 +}; + +DEFINE_POD_SERIALIZATION(read_semantic_t); + +enum learner_status { + LearningWithoutPrepare = 0, + LearningWithPrepare = 1, + LearningSucceeded = 2, + LearningFailed = 3, + Learning_INVALID = 4 +}; + +DEFINE_POD_SERIALIZATION(learner_status); + +enum config_type { + CT_NONE = 0, + CT_ASSIGN_PRIMARY = 1, + CT_ADD_SECONDARY = 2, + CT_DOWNGRADE_TO_SECONDARY = 3, + CT_DOWNGRADE_TO_INACTIVE = 4, + CT_REMOVE = 5, + CT_UPGRADE_TO_SECONDARY = 6 +}; + +DEFINE_POD_SERIALIZATION(config_type); + +class global_partition_id; + +class mutation_header; + +class mutation_data; + +class partition_configuration; + +class replica_configuration; + +class prepare_msg; + +class read_request_header; + +class write_request_header; + +class rw_response_header; + +class prepare_ack; + +class learn_state; + +class learn_request; + +class learn_response; + +class group_check_request; + +class group_check_response; + +class meta_request_header; + +class meta_response_header; + +class configuration_update_request; + +class configuration_update_response; + +class configuration_proposal_request; + +class configuration_query_by_node_request; + +class configuration_query_by_node_response; + +class configuration_query_by_index_request; + +class configuration_query_by_index_response; + +class query_replica_decree_request; + +class query_replica_decree_response; + + +class global_partition_id { + public: + + global_partition_id(const global_partition_id&); + global_partition_id& operator=(const global_partition_id&); + global_partition_id() : app_id(-1), pidx(-1) { + } + + virtual ~global_partition_id() throw(); + int32_t app_id; + int32_t pidx; + bool operator == (const global_partition_id & rhs) const + { + if (!(app_id == rhs.app_id)) + return false; + if (!(pidx == rhs.pidx)) + return false; + return true; + } + bool operator != (const global_partition_id &rhs) const { + return !(*this == rhs); + } + + bool operator < (const global_partition_id & ) const; + + +}; + +void swap(global_partition_id &a, global_partition_id &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param global_partition_id& val) { + ::dsn::unmarshall(reader, val.app_id); + ::dsn::unmarshall(reader, val.pidx); +} + +inline void marshall(::dsn::binary_writer& writer, const global_partition_id& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.app_id, pos); + ::dsn::marshall(writer, val.pidx, pos); +} + + +class mutation_header { + public: + + mutation_header(const mutation_header&); + mutation_header& operator=(const mutation_header&); + mutation_header() : ballot(0), decree(0), log_offset(0), last_committed_decree(0) { + } + + virtual ~mutation_header() throw(); + global_partition_id gpid; + int64_t ballot; + int64_t decree; + int64_t log_offset; + int64_t last_committed_decree; + bool operator == (const mutation_header & rhs) const + { + if (!(gpid == rhs.gpid)) + return false; + if (!(ballot == rhs.ballot)) + return false; + if (!(decree == rhs.decree)) + return false; + if (!(log_offset == rhs.log_offset)) + return false; + if (!(last_committed_decree == rhs.last_committed_decree)) + return false; + return true; + } + bool operator != (const mutation_header &rhs) const { + return !(*this == rhs); + } + + bool operator < (const mutation_header & ) const; + + +}; + +void swap(mutation_header &a, mutation_header &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param mutation_header& val) { + unmarshall(reader, val.gpid); + ::dsn::unmarshall(reader, val.ballot); + ::dsn::unmarshall(reader, val.decree); + ::dsn::unmarshall(reader, val.log_offset); + ::dsn::unmarshall(reader, val.last_committed_decree); +} + +inline void marshall(::dsn::binary_writer& writer, const mutation_header& val, uint16_t pos = 0xffff) { + marshall(writer, val.gpid, pos); + ::dsn::marshall(writer, val.ballot, pos); + ::dsn::marshall(writer, val.decree, pos); + ::dsn::marshall(writer, val.log_offset, pos); + ::dsn::marshall(writer, val.last_committed_decree, pos); +} + + +class mutation_data { + public: + + mutation_data(const mutation_data&); + mutation_data& operator=(const mutation_data&); + mutation_data() { + } + + virtual ~mutation_data() throw(); + mutation_header header; + std::vector< ::dsn::blob> updates; + bool operator == (const mutation_data & rhs) const + { + if (!(header == rhs.header)) + return false; + if (!(updates == rhs.updates)) + return false; + return true; + } + bool operator != (const mutation_data &rhs) const { + return !(*this == rhs); + } + + bool operator < (const mutation_data & ) const; + + +}; + +void swap(mutation_data &a, mutation_data &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param mutation_data& val) { + unmarshall(reader, val.header); + unmarshall(reader, val.updates); +} + +inline void marshall(::dsn::binary_writer& writer, const mutation_data& val, uint16_t pos = 0xffff) { + marshall(writer, val.header, pos); + marshall(writer, val.updates, pos); +} + + +class partition_configuration { + public: + + partition_configuration(const partition_configuration&); + partition_configuration& operator=(const partition_configuration&); + partition_configuration() : app_type(), ballot(0), max_replica_count(0), last_committed_decree(0) { + } + + virtual ~partition_configuration() throw(); + std::string app_type; + global_partition_id gpid; + int64_t ballot; + int32_t max_replica_count; + ::dsn::end_point primary; + std::vector< ::dsn::end_point> secondaries; + std::vector< ::dsn::end_point> drop_outs; + int64_t last_committed_decree; + bool operator == (const partition_configuration & rhs) const + { + if (!(app_type == rhs.app_type)) + return false; + if (!(gpid == rhs.gpid)) + return false; + if (!(ballot == rhs.ballot)) + return false; + if (!(max_replica_count == rhs.max_replica_count)) + return false; + if (!(primary == rhs.primary)) + return false; + if (!(secondaries == rhs.secondaries)) + return false; + if (!(drop_outs == rhs.drop_outs)) + return false; + if (!(last_committed_decree == rhs.last_committed_decree)) + return false; + return true; + } + bool operator != (const partition_configuration &rhs) const { + return !(*this == rhs); + } + + bool operator < (const partition_configuration & ) const; + + +}; + +void swap(partition_configuration &a, partition_configuration &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param partition_configuration& val) { + ::dsn::unmarshall(reader, val.app_type); + unmarshall(reader, val.gpid); + ::dsn::unmarshall(reader, val.ballot); + ::dsn::unmarshall(reader, val.max_replica_count); + unmarshall(reader, val.primary); + unmarshall(reader, val.secondaries); + unmarshall(reader, val.drop_outs); + ::dsn::unmarshall(reader, val.last_committed_decree); +} + +inline void marshall(::dsn::binary_writer& writer, const partition_configuration& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.app_type, pos); + marshall(writer, val.gpid, pos); + ::dsn::marshall(writer, val.ballot, pos); + ::dsn::marshall(writer, val.max_replica_count, pos); + marshall(writer, val.primary, pos); + marshall(writer, val.secondaries, pos); + marshall(writer, val.drop_outs, pos); + ::dsn::marshall(writer, val.last_committed_decree, pos); +} + + +class replica_configuration { + public: + + replica_configuration(const replica_configuration&); + replica_configuration& operator=(const replica_configuration&); + replica_configuration() : ballot(0), status((partition_status)0) { + status = (partition_status)0; + + } + + virtual ~replica_configuration() throw(); + global_partition_id gpid; + int64_t ballot; + ::dsn::end_point primary; + partition_status status; + bool operator == (const replica_configuration & rhs) const + { + if (!(gpid == rhs.gpid)) + return false; + if (!(ballot == rhs.ballot)) + return false; + if (!(primary == rhs.primary)) + return false; + if (!(status == rhs.status)) + return false; + return true; + } + bool operator != (const replica_configuration &rhs) const { + return !(*this == rhs); + } + + bool operator < (const replica_configuration & ) const; + + +}; + +void swap(replica_configuration &a, replica_configuration &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param replica_configuration& val) { + unmarshall(reader, val.gpid); + ::dsn::unmarshall(reader, val.ballot); + unmarshall(reader, val.primary); + unmarshall(reader, val.status); +} + +inline void marshall(::dsn::binary_writer& writer, const replica_configuration& val, uint16_t pos = 0xffff) { + marshall(writer, val.gpid, pos); + ::dsn::marshall(writer, val.ballot, pos); + marshall(writer, val.primary, pos); + marshall(writer, val.status, pos); +} + + +class prepare_msg { + public: + + prepare_msg(const prepare_msg&); + prepare_msg& operator=(const prepare_msg&); + prepare_msg() { + } + + virtual ~prepare_msg() throw(); + replica_configuration config; + mutation_data mu; + bool operator == (const prepare_msg & rhs) const + { + if (!(config == rhs.config)) + return false; + if (!(mu == rhs.mu)) + return false; + return true; + } + bool operator != (const prepare_msg &rhs) const { + return !(*this == rhs); + } + + bool operator < (const prepare_msg & ) const; + + +}; + +void swap(prepare_msg &a, prepare_msg &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param prepare_msg& val) { + unmarshall(reader, val.config); + unmarshall(reader, val.mu); +} + +inline void marshall(::dsn::binary_writer& writer, const prepare_msg& val, uint16_t pos = 0xffff) { + marshall(writer, val.config, pos); + marshall(writer, val.mu, pos); +} + + +class read_request_header { + public: + + read_request_header(const read_request_header&); + read_request_header& operator=(const read_request_header&); + read_request_header() : code(0), semantic((read_semantic_t)0), version_decree(-1LL) { + semantic = (read_semantic_t)0; + + } + + virtual ~read_request_header() throw(); + global_partition_id gpid; + int32_t code; + read_semantic_t semantic; + int64_t version_decree; + bool operator == (const read_request_header & rhs) const + { + if (!(gpid == rhs.gpid)) + return false; + if (!(code == rhs.code)) + return false; + if (!(semantic == rhs.semantic)) + return false; + if (!(version_decree == rhs.version_decree)) + return false; + return true; + } + bool operator != (const read_request_header &rhs) const { + return !(*this == rhs); + } + + bool operator < (const read_request_header & ) const; + + +}; + +void swap(read_request_header &a, read_request_header &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param read_request_header& val) { + unmarshall(reader, val.gpid); + ::dsn::unmarshall(reader, val.code); + unmarshall(reader, val.semantic); + ::dsn::unmarshall(reader, val.version_decree); +} + +inline void marshall(::dsn::binary_writer& writer, const read_request_header& val, uint16_t pos = 0xffff) { + marshall(writer, val.gpid, pos); + ::dsn::marshall(writer, val.code, pos); + marshall(writer, val.semantic, pos); + ::dsn::marshall(writer, val.version_decree, pos); +} + + +class write_request_header { + public: + + write_request_header(const write_request_header&); + write_request_header& operator=(const write_request_header&); + write_request_header() : code(0) { + } + + virtual ~write_request_header() throw(); + global_partition_id gpid; + int32_t code; + bool operator == (const write_request_header & rhs) const + { + if (!(gpid == rhs.gpid)) + return false; + if (!(code == rhs.code)) + return false; + return true; + } + bool operator != (const write_request_header &rhs) const { + return !(*this == rhs); + } + + bool operator < (const write_request_header & ) const; + + +}; + +void swap(write_request_header &a, write_request_header &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param write_request_header& val) { + unmarshall(reader, val.gpid); + ::dsn::unmarshall(reader, val.code); +} + +inline void marshall(::dsn::binary_writer& writer, const write_request_header& val, uint16_t pos = 0xffff) { + marshall(writer, val.gpid, pos); + ::dsn::marshall(writer, val.code, pos); +} + + +class rw_response_header { + public: + + rw_response_header(const rw_response_header&); + rw_response_header& operator=(const rw_response_header&); + rw_response_header() : err(0) { + } + + virtual ~rw_response_header() throw(); + int32_t err; + bool operator == (const rw_response_header & rhs) const + { + if (!(err == rhs.err)) + return false; + return true; + } + bool operator != (const rw_response_header &rhs) const { + return !(*this == rhs); + } + + bool operator < (const rw_response_header & ) const; + + +}; + +void swap(rw_response_header &a, rw_response_header &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param rw_response_header& val) { + ::dsn::unmarshall(reader, val.err); +} + +inline void marshall(::dsn::binary_writer& writer, const rw_response_header& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.err, pos); +} + + +class prepare_ack { + public: + + prepare_ack(const prepare_ack&); + prepare_ack& operator=(const prepare_ack&); + prepare_ack() : err(0), ballot(0), decree(0), last_committed_decree_in_app(0), last_committed_decree_in_prepare_list(0) { + } + + virtual ~prepare_ack() throw(); + global_partition_id gpid; + int32_t err; + int64_t ballot; + int64_t decree; + int64_t last_committed_decree_in_app; + int64_t last_committed_decree_in_prepare_list; + bool operator == (const prepare_ack & rhs) const + { + if (!(gpid == rhs.gpid)) + return false; + if (!(err == rhs.err)) + return false; + if (!(ballot == rhs.ballot)) + return false; + if (!(decree == rhs.decree)) + return false; + if (!(last_committed_decree_in_app == rhs.last_committed_decree_in_app)) + return false; + if (!(last_committed_decree_in_prepare_list == rhs.last_committed_decree_in_prepare_list)) + return false; + return true; + } + bool operator != (const prepare_ack &rhs) const { + return !(*this == rhs); + } + + bool operator < (const prepare_ack & ) const; + + +}; + +void swap(prepare_ack &a, prepare_ack &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param prepare_ack& val) { + unmarshall(reader, val.gpid); + ::dsn::unmarshall(reader, val.err); + ::dsn::unmarshall(reader, val.ballot); + ::dsn::unmarshall(reader, val.decree); + ::dsn::unmarshall(reader, val.last_committed_decree_in_app); + ::dsn::unmarshall(reader, val.last_committed_decree_in_prepare_list); +} + +inline void marshall(::dsn::binary_writer& writer, const prepare_ack& val, uint16_t pos = 0xffff) { + marshall(writer, val.gpid, pos); + ::dsn::marshall(writer, val.err, pos); + ::dsn::marshall(writer, val.ballot, pos); + ::dsn::marshall(writer, val.decree, pos); + ::dsn::marshall(writer, val.last_committed_decree_in_app, pos); + ::dsn::marshall(writer, val.last_committed_decree_in_prepare_list, pos); +} + + +class learn_state { + public: + + learn_state(const learn_state&); + learn_state& operator=(const learn_state&); + learn_state() { + } + + virtual ~learn_state() throw(); + ::dsn::blob meta; + std::vector files; + bool operator == (const learn_state & rhs) const + { + if (!(meta == rhs.meta)) + return false; + if (!(files == rhs.files)) + return false; + return true; + } + bool operator != (const learn_state &rhs) const { + return !(*this == rhs); + } + + bool operator < (const learn_state & ) const; + + +}; + +void swap(learn_state &a, learn_state &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param learn_state& val) { + unmarshall(reader, val.meta); + unmarshall(reader, val.files); +} + +inline void marshall(::dsn::binary_writer& writer, const learn_state& val, uint16_t pos = 0xffff) { + marshall(writer, val.meta, pos); + marshall(writer, val.files, pos); +} + + +class learn_request { + public: + + learn_request(const learn_request&); + learn_request& operator=(const learn_request&); + learn_request() : signature(0), last_committed_decree_in_app(0), last_committed_decree_in_prepare_list(0) { + } + + virtual ~learn_request() throw(); + global_partition_id gpid; + ::dsn::end_point learner; + int64_t signature; + int64_t last_committed_decree_in_app; + int64_t last_committed_decree_in_prepare_list; + ::dsn::blob app_specific_learn_request; + bool operator == (const learn_request & rhs) const + { + if (!(gpid == rhs.gpid)) + return false; + if (!(learner == rhs.learner)) + return false; + if (!(signature == rhs.signature)) + return false; + if (!(last_committed_decree_in_app == rhs.last_committed_decree_in_app)) + return false; + if (!(last_committed_decree_in_prepare_list == rhs.last_committed_decree_in_prepare_list)) + return false; + if (!(app_specific_learn_request == rhs.app_specific_learn_request)) + return false; + return true; + } + bool operator != (const learn_request &rhs) const { + return !(*this == rhs); + } + + bool operator < (const learn_request & ) const; + + +}; + +void swap(learn_request &a, learn_request &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param learn_request& val) { + unmarshall(reader, val.gpid); + unmarshall(reader, val.learner); + ::dsn::unmarshall(reader, val.signature); + ::dsn::unmarshall(reader, val.last_committed_decree_in_app); + ::dsn::unmarshall(reader, val.last_committed_decree_in_prepare_list); + unmarshall(reader, val.app_specific_learn_request); +} + +inline void marshall(::dsn::binary_writer& writer, const learn_request& val, uint16_t pos = 0xffff) { + marshall(writer, val.gpid, pos); + marshall(writer, val.learner, pos); + ::dsn::marshall(writer, val.signature, pos); + ::dsn::marshall(writer, val.last_committed_decree_in_app, pos); + ::dsn::marshall(writer, val.last_committed_decree_in_prepare_list, pos); + marshall(writer, val.app_specific_learn_request, pos); +} + + +class learn_response { + public: + + learn_response(const learn_response&); + learn_response& operator=(const learn_response&); + learn_response() : err(0), commit_decree(0), prepare_start_decree(0), base_local_dir() { + } + + virtual ~learn_response() throw(); + int32_t err; + replica_configuration config; + int64_t commit_decree; + int64_t prepare_start_decree; + learn_state state; + std::string base_local_dir; + bool operator == (const learn_response & rhs) const + { + if (!(err == rhs.err)) + return false; + if (!(config == rhs.config)) + return false; + if (!(commit_decree == rhs.commit_decree)) + return false; + if (!(prepare_start_decree == rhs.prepare_start_decree)) + return false; + if (!(state == rhs.state)) + return false; + if (!(base_local_dir == rhs.base_local_dir)) + return false; + return true; + } + bool operator != (const learn_response &rhs) const { + return !(*this == rhs); + } + + bool operator < (const learn_response & ) const; + + +}; + +void swap(learn_response &a, learn_response &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param learn_response& val) { + ::dsn::unmarshall(reader, val.err); + unmarshall(reader, val.config); + ::dsn::unmarshall(reader, val.commit_decree); + ::dsn::unmarshall(reader, val.prepare_start_decree); + unmarshall(reader, val.state); + ::dsn::unmarshall(reader, val.base_local_dir); +} + +inline void marshall(::dsn::binary_writer& writer, const learn_response& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.err, pos); + marshall(writer, val.config, pos); + ::dsn::marshall(writer, val.commit_decree, pos); + ::dsn::marshall(writer, val.prepare_start_decree, pos); + marshall(writer, val.state, pos); + ::dsn::marshall(writer, val.base_local_dir, pos); +} + + +class group_check_request { + public: + + group_check_request(const group_check_request&); + group_check_request& operator=(const group_check_request&); + group_check_request() : app_type(), last_committed_decree(0), learner_signature(0) { + } + + virtual ~group_check_request() throw(); + std::string app_type; + ::dsn::end_point node; + replica_configuration config; + int64_t last_committed_decree; + int64_t learner_signature; + bool operator == (const group_check_request & rhs) const + { + if (!(app_type == rhs.app_type)) + return false; + if (!(node == rhs.node)) + return false; + if (!(config == rhs.config)) + return false; + if (!(last_committed_decree == rhs.last_committed_decree)) + return false; + if (!(learner_signature == rhs.learner_signature)) + return false; + return true; + } + bool operator != (const group_check_request &rhs) const { + return !(*this == rhs); + } + + bool operator < (const group_check_request & ) const; + + +}; + +void swap(group_check_request &a, group_check_request &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param group_check_request& val) { + ::dsn::unmarshall(reader, val.app_type); + unmarshall(reader, val.node); + unmarshall(reader, val.config); + ::dsn::unmarshall(reader, val.last_committed_decree); + ::dsn::unmarshall(reader, val.learner_signature); +} + +inline void marshall(::dsn::binary_writer& writer, const group_check_request& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.app_type, pos); + marshall(writer, val.node, pos); + marshall(writer, val.config, pos); + ::dsn::marshall(writer, val.last_committed_decree, pos); + ::dsn::marshall(writer, val.learner_signature, pos); +} + + +class group_check_response { + public: + + group_check_response(const group_check_response&); + group_check_response& operator=(const group_check_response&); + group_check_response() : err(0), last_committed_decree_in_app(0), last_committed_decree_in_prepare_list(0), learner_status_((learner_status)3), learner_signature(0) { + learner_status_ = (learner_status)3; + + } + + virtual ~group_check_response() throw(); + global_partition_id gpid; + int32_t err; + int64_t last_committed_decree_in_app; + int64_t last_committed_decree_in_prepare_list; + learner_status learner_status_; + int64_t learner_signature; + ::dsn::end_point node; + bool operator == (const group_check_response & rhs) const + { + if (!(gpid == rhs.gpid)) + return false; + if (!(err == rhs.err)) + return false; + if (!(last_committed_decree_in_app == rhs.last_committed_decree_in_app)) + return false; + if (!(last_committed_decree_in_prepare_list == rhs.last_committed_decree_in_prepare_list)) + return false; + if (!(learner_status_ == rhs.learner_status_)) + return false; + if (!(learner_signature == rhs.learner_signature)) + return false; + if (!(node == rhs.node)) + return false; + return true; + } + bool operator != (const group_check_response &rhs) const { + return !(*this == rhs); + } + + bool operator < (const group_check_response & ) const; + + +}; + +void swap(group_check_response &a, group_check_response &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param group_check_response& val) { + unmarshall(reader, val.gpid); + ::dsn::unmarshall(reader, val.err); + ::dsn::unmarshall(reader, val.last_committed_decree_in_app); + ::dsn::unmarshall(reader, val.last_committed_decree_in_prepare_list); + unmarshall(reader, val.learner_status_); + ::dsn::unmarshall(reader, val.learner_signature); + unmarshall(reader, val.node); +} + +inline void marshall(::dsn::binary_writer& writer, const group_check_response& val, uint16_t pos = 0xffff) { + marshall(writer, val.gpid, pos); + ::dsn::marshall(writer, val.err, pos); + ::dsn::marshall(writer, val.last_committed_decree_in_app, pos); + ::dsn::marshall(writer, val.last_committed_decree_in_prepare_list, pos); + marshall(writer, val.learner_status_, pos); + ::dsn::marshall(writer, val.learner_signature, pos); + marshall(writer, val.node, pos); +} + + +class meta_request_header { + public: + + meta_request_header(const meta_request_header&); + meta_request_header& operator=(const meta_request_header&); + meta_request_header() : rpc_tag(0) { + } + + virtual ~meta_request_header() throw(); + int32_t rpc_tag; + bool operator == (const meta_request_header & rhs) const + { + if (!(rpc_tag == rhs.rpc_tag)) + return false; + return true; + } + bool operator != (const meta_request_header &rhs) const { + return !(*this == rhs); + } + + bool operator < (const meta_request_header & ) const; + + +}; + +void swap(meta_request_header &a, meta_request_header &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param meta_request_header& val) { + ::dsn::unmarshall(reader, val.rpc_tag); +} + +inline void marshall(::dsn::binary_writer& writer, const meta_request_header& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.rpc_tag, pos); +} + + +class meta_response_header { + public: + + meta_response_header(const meta_response_header&); + meta_response_header& operator=(const meta_response_header&); + meta_response_header() : err(0) { + } + + virtual ~meta_response_header() throw(); + int32_t err; + ::dsn::end_point primary_address; + bool operator == (const meta_response_header & rhs) const + { + if (!(err == rhs.err)) + return false; + if (!(primary_address == rhs.primary_address)) + return false; + return true; + } + bool operator != (const meta_response_header &rhs) const { + return !(*this == rhs); + } + + bool operator < (const meta_response_header & ) const; + + +}; + +void swap(meta_response_header &a, meta_response_header &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param meta_response_header& val) { + ::dsn::unmarshall(reader, val.err); + unmarshall(reader, val.primary_address); +} + +inline void marshall(::dsn::binary_writer& writer, const meta_response_header& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.err, pos); + marshall(writer, val.primary_address, pos); +} + + +class configuration_update_request { + public: + + configuration_update_request(const configuration_update_request&); + configuration_update_request& operator=(const configuration_update_request&); + configuration_update_request() : type((config_type)0) { + type = (config_type)0; + + } + + virtual ~configuration_update_request() throw(); + partition_configuration config; + config_type type; + ::dsn::end_point node; + bool operator == (const configuration_update_request & rhs) const + { + if (!(config == rhs.config)) + return false; + if (!(type == rhs.type)) + return false; + if (!(node == rhs.node)) + return false; + return true; + } + bool operator != (const configuration_update_request &rhs) const { + return !(*this == rhs); + } + + bool operator < (const configuration_update_request & ) const; + + +}; + +void swap(configuration_update_request &a, configuration_update_request &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param configuration_update_request& val) { + unmarshall(reader, val.config); + unmarshall(reader, val.type); + unmarshall(reader, val.node); +} + +inline void marshall(::dsn::binary_writer& writer, const configuration_update_request& val, uint16_t pos = 0xffff) { + marshall(writer, val.config, pos); + marshall(writer, val.type, pos); + marshall(writer, val.node, pos); +} + + +class configuration_update_response { + public: + + configuration_update_response(const configuration_update_response&); + configuration_update_response& operator=(const configuration_update_response&); + configuration_update_response() : err(0) { + } + + virtual ~configuration_update_response() throw(); + int32_t err; + partition_configuration config; + bool operator == (const configuration_update_response & rhs) const + { + if (!(err == rhs.err)) + return false; + if (!(config == rhs.config)) + return false; + return true; + } + bool operator != (const configuration_update_response &rhs) const { + return !(*this == rhs); + } + + bool operator < (const configuration_update_response & ) const; + + +}; + +void swap(configuration_update_response &a, configuration_update_response &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param configuration_update_response& val) { + ::dsn::unmarshall(reader, val.err); + unmarshall(reader, val.config); +} + +inline void marshall(::dsn::binary_writer& writer, const configuration_update_response& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.err, pos); + marshall(writer, val.config, pos); +} + + +class configuration_proposal_request { + public: + + configuration_proposal_request(const configuration_proposal_request&); + configuration_proposal_request& operator=(const configuration_proposal_request&); + configuration_proposal_request() : type((config_type)0), is_clean_data(false), is_upgrade(false) { + type = (config_type)0; + + } + + virtual ~configuration_proposal_request() throw(); + partition_configuration config; + config_type type; + ::dsn::end_point node; + bool is_clean_data; + bool is_upgrade; + bool operator == (const configuration_proposal_request & rhs) const + { + if (!(config == rhs.config)) + return false; + if (!(type == rhs.type)) + return false; + if (!(node == rhs.node)) + return false; + if (!(is_clean_data == rhs.is_clean_data)) + return false; + if (!(is_upgrade == rhs.is_upgrade)) + return false; + return true; + } + bool operator != (const configuration_proposal_request &rhs) const { + return !(*this == rhs); + } + + bool operator < (const configuration_proposal_request & ) const; + + +}; + +void swap(configuration_proposal_request &a, configuration_proposal_request &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param configuration_proposal_request& val) { + unmarshall(reader, val.config); + unmarshall(reader, val.type); + unmarshall(reader, val.node); + ::dsn::unmarshall(reader, val.is_clean_data); + ::dsn::unmarshall(reader, val.is_upgrade); +} + +inline void marshall(::dsn::binary_writer& writer, const configuration_proposal_request& val, uint16_t pos = 0xffff) { + marshall(writer, val.config, pos); + marshall(writer, val.type, pos); + marshall(writer, val.node, pos); + ::dsn::marshall(writer, val.is_clean_data, pos); + ::dsn::marshall(writer, val.is_upgrade, pos); +} + + +class configuration_query_by_node_request { + public: + + configuration_query_by_node_request(const configuration_query_by_node_request&); + configuration_query_by_node_request& operator=(const configuration_query_by_node_request&); + configuration_query_by_node_request() { + } + + virtual ~configuration_query_by_node_request() throw(); + ::dsn::end_point node; + bool operator == (const configuration_query_by_node_request & rhs) const + { + if (!(node == rhs.node)) + return false; + return true; + } + bool operator != (const configuration_query_by_node_request &rhs) const { + return !(*this == rhs); + } + + bool operator < (const configuration_query_by_node_request & ) const; + + +}; + +void swap(configuration_query_by_node_request &a, configuration_query_by_node_request &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param configuration_query_by_node_request& val) { + unmarshall(reader, val.node); +} + +inline void marshall(::dsn::binary_writer& writer, const configuration_query_by_node_request& val, uint16_t pos = 0xffff) { + marshall(writer, val.node, pos); +} + + +class configuration_query_by_node_response { + public: + + configuration_query_by_node_response(const configuration_query_by_node_response&); + configuration_query_by_node_response& operator=(const configuration_query_by_node_response&); + configuration_query_by_node_response() : err(0) { + } + + virtual ~configuration_query_by_node_response() throw(); + int32_t err; + std::vector partitions; + bool operator == (const configuration_query_by_node_response & rhs) const + { + if (!(err == rhs.err)) + return false; + if (!(partitions == rhs.partitions)) + return false; + return true; + } + bool operator != (const configuration_query_by_node_response &rhs) const { + return !(*this == rhs); + } + + bool operator < (const configuration_query_by_node_response & ) const; + + +}; + +void swap(configuration_query_by_node_response &a, configuration_query_by_node_response &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param configuration_query_by_node_response& val) { + ::dsn::unmarshall(reader, val.err); + unmarshall(reader, val.partitions); +} + +inline void marshall(::dsn::binary_writer& writer, const configuration_query_by_node_response& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.err, pos); + marshall(writer, val.partitions, pos); +} + + +class configuration_query_by_index_request { + public: + + configuration_query_by_index_request(const configuration_query_by_index_request&); + configuration_query_by_index_request& operator=(const configuration_query_by_index_request&); + configuration_query_by_index_request() : app_name() { + } + + virtual ~configuration_query_by_index_request() throw(); + std::string app_name; + std::vector partition_indices; + bool operator == (const configuration_query_by_index_request & rhs) const + { + if (!(app_name == rhs.app_name)) + return false; + if (!(partition_indices == rhs.partition_indices)) + return false; + return true; + } + bool operator != (const configuration_query_by_index_request &rhs) const { + return !(*this == rhs); + } + + bool operator < (const configuration_query_by_index_request & ) const; + + +}; + +void swap(configuration_query_by_index_request &a, configuration_query_by_index_request &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param configuration_query_by_index_request& val) { + ::dsn::unmarshall(reader, val.app_name); + unmarshall(reader, val.partition_indices); +} + +inline void marshall(::dsn::binary_writer& writer, const configuration_query_by_index_request& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.app_name, pos); + marshall(writer, val.partition_indices, pos); +} + + +class configuration_query_by_index_response { + public: + + configuration_query_by_index_response(const configuration_query_by_index_response&); + configuration_query_by_index_response& operator=(const configuration_query_by_index_response&); + configuration_query_by_index_response() : err(0) { + } + + virtual ~configuration_query_by_index_response() throw(); + int32_t err; + std::vector partitions; + bool operator == (const configuration_query_by_index_response & rhs) const + { + if (!(err == rhs.err)) + return false; + if (!(partitions == rhs.partitions)) + return false; + return true; + } + bool operator != (const configuration_query_by_index_response &rhs) const { + return !(*this == rhs); + } + + bool operator < (const configuration_query_by_index_response & ) const; + + +}; + +void swap(configuration_query_by_index_response &a, configuration_query_by_index_response &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param configuration_query_by_index_response& val) { + ::dsn::unmarshall(reader, val.err); + unmarshall(reader, val.partitions); +} + +inline void marshall(::dsn::binary_writer& writer, const configuration_query_by_index_response& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.err, pos); + marshall(writer, val.partitions, pos); +} + + +class query_replica_decree_request { + public: + + query_replica_decree_request(const query_replica_decree_request&); + query_replica_decree_request& operator=(const query_replica_decree_request&); + query_replica_decree_request() { + } + + virtual ~query_replica_decree_request() throw(); + global_partition_id gpid; + ::dsn::end_point node; + bool operator == (const query_replica_decree_request & rhs) const + { + if (!(gpid == rhs.gpid)) + return false; + if (!(node == rhs.node)) + return false; + return true; + } + bool operator != (const query_replica_decree_request &rhs) const { + return !(*this == rhs); + } + + bool operator < (const query_replica_decree_request & ) const; + + +}; + +void swap(query_replica_decree_request &a, query_replica_decree_request &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param query_replica_decree_request& val) { + unmarshall(reader, val.gpid); + unmarshall(reader, val.node); +} + +inline void marshall(::dsn::binary_writer& writer, const query_replica_decree_request& val, uint16_t pos = 0xffff) { + marshall(writer, val.gpid, pos); + marshall(writer, val.node, pos); +} + + +class query_replica_decree_response { + public: + + query_replica_decree_response(const query_replica_decree_response&); + query_replica_decree_response& operator=(const query_replica_decree_response&); + query_replica_decree_response() : err(0), last_decree(0) { + } + + virtual ~query_replica_decree_response() throw(); + int32_t err; + int64_t last_decree; + bool operator == (const query_replica_decree_response & rhs) const + { + if (!(err == rhs.err)) + return false; + if (!(last_decree == rhs.last_decree)) + return false; + return true; + } + bool operator != (const query_replica_decree_response &rhs) const { + return !(*this == rhs); + } + + bool operator < (const query_replica_decree_response & ) const; + + +}; + +void swap(query_replica_decree_response &a, query_replica_decree_response &b); + +inline void unmarshall(::dsn::binary_reader& reader, __out_param query_replica_decree_response& val) { + ::dsn::unmarshall(reader, val.err); + ::dsn::unmarshall(reader, val.last_decree); +} + +inline void marshall(::dsn::binary_writer& writer, const query_replica_decree_response& val, uint16_t pos = 0xffff) { + ::dsn::marshall(writer, val.err, pos); + ::dsn::marshall(writer, val.last_decree, pos); +} + +}} // namespace + +#endif diff --git a/include/dsn/gproto_helper.h b/include/dsn/gproto_helper.h new file mode 100644 index 0000000000..b0fbb048bb --- /dev/null +++ b/include/dsn/gproto_helper.h @@ -0,0 +1,117 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include + +# include +# include +# include + +namespace dsn { + + class gproto_binary_reader : public ::google::protobuf::io::ZeroCopyInputStream + { + public: + gproto_binary_reader(binary_reader& reader) + : _reader(reader) + { + } + + virtual bool Next(const void** data, int* size) + { + return _reader.next(data, size); + } + + virtual void BackUp(int count) + { + _reader.backup(count); + } + + virtual bool Skip(int count) + { + return _reader.skip(count); + } + + virtual int64_t ByteCount() const + { + return static_cast(_reader.total_size()); + } + + private: + binary_reader& _reader; + }; + + class gproto_binary_writer : public ::google::protobuf::io::ZeroCopyOutputStream + { + public: + gproto_binary_writer(binary_writer& writer) + : _writer(writer) + { + } + + virtual bool Next(void** data, int* size) + { + return _writer.next(data, size); + } + + virtual void BackUp(int count) + { + _writer.backup(count); + } + + virtual int64_t ByteCount() const + { + return static_cast(_writer.total_size()); + } + + virtual bool WriteAliasedRaw(const void* data, int size) + { + return false; + } + + private: + binary_writer& _writer; + }; + + template + void marshall(binary_writer& writer, const T& val) + { + gproto_binary_writer wt2(writer); + ::google::protobuf::io::CodedOutputStream os(&wt2); + val.SerializeWithCachedSizes(&os); + } + + template + void unmarshall(binary_reader& reader, __out_param T& val) + { + gproto_binary_reader rd2(reader); + ::google::protobuf::io::CodedInputStream is(&rd2); + val.MergePartialFromCodedStream(&is); + } + +} diff --git a/include/dsn/internal/admission_controller.h b/include/dsn/internal/admission_controller.h new file mode 100644 index 0000000000..8042fef1fa --- /dev/null +++ b/include/dsn/internal/admission_controller.h @@ -0,0 +1,59 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include + +namespace dsn { + +class admission_controller +{ +public: + template static admission_controller* create(task_queue* q, const char* args); + +public: + admission_controller(task_queue* q, std::vector& sargs) : _queue(q) {} + ~admission_controller(void) {} + + virtual bool is_task_accepted(task_ptr& task) = 0; + + task_queue* bound_queue() const { return _queue; } + +private: + task_queue* _queue; +}; + +// ----------------- inline implementation ----------------- +template +admission_controller* admission_controller::create(task_queue* q, const char* args) +{ + std::vector sargs; + dsn::utils::split_args(args, sargs, ' '); + return new T(q, sargs); +} + +} // end namespace diff --git a/include/dsn/internal/aio_provider.h b/include/dsn/internal/aio_provider.h new file mode 100644 index 0000000000..aabc5a154c --- /dev/null +++ b/include/dsn/internal/aio_provider.h @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include + +namespace dsn { + +class disk_engine; +class service_node; + +class aio_provider +{ +public: + template static aio_provider* create(disk_engine* disk, aio_provider* inner_provider) + { + return new T(disk, inner_provider); + } + +public: + aio_provider(disk_engine* disk, aio_provider* inner_provider); + service_node* node() const; + + virtual handle_t open(const char* file_name, int flag, int pmode) = 0; + virtual error_code close(handle_t hFile) = 0; + virtual void aio(aio_task_ptr& aio) = 0; + virtual disk_aio_ptr prepare_aio_context(aio_task*) = 0; + +protected: + void complete_io(aio_task_ptr& aio, error_code err, uint32_t bytes, int delay_milliseconds = 0); + +private: + disk_engine *_engine; +}; + + +} // end namespace + + diff --git a/include/dsn/internal/command.h b/include/dsn/internal/command.h new file mode 100644 index 0000000000..e4415e82bf --- /dev/null +++ b/include/dsn/internal/command.h @@ -0,0 +1,28 @@ +#pragma once + +# include +# include +# include + +namespace dsn { + + typedef std::function&)> command_handler; + + void register_command( + const std::vector& commands, // commands, e.g., {"help", "Help", "HELP", "h", "H"} + const char* help, // help info for users + command_handler handler + ); + + void register_command( + const char** commands, // commands, e.g., {"help", "Help", nullptr} + const char* help, // help info for users + command_handler handler + ); + + void register_command( + const char* command, // commands, e.g., "help" + const char* help, // help info for users + command_handler handler + ); +} diff --git a/include/dsn/internal/configuration.h b/include/dsn/internal/configuration.h new file mode 100644 index 0000000000..ac1dfc0ccc --- /dev/null +++ b/include/dsn/internal/configuration.h @@ -0,0 +1,211 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include +# include +# include + + +namespace dsn { + +class configuration; +typedef std::shared_ptr configuration_ptr; +typedef void (*config_file_change_notifier)(configuration_ptr); + +class configuration +{ +public: + template static configuration* create(const char* file_name) + { + return new T(file_name); + } + +public: + configuration(const char* file_name); + + ~configuration(void); + + void get_all_sections(std::vector& sections); + + void get_all_keys(const char* section, std::vector& keys); + + std::string get_string_value(const char* section, const char* key, const char* default_value); + + std::list get_string_value_list(const char* section, const char* key, char splitter); + + void register_config_change_notification(config_file_change_notifier notifier); + + bool has_section(const char* section, bool warn_if_not = true); + + bool has_key(const char* section, const char* key); + + const char* get_file_name() const { return _file_name.c_str(); } + + // ---------------------- commmon routines ---------------------------------- + + template T get_value(const char* section, const char* key, T default_value); + +private: + bool get_string_value_internal(const char* section, const char* key, const char* default_value, std::string& ov); + +private: + struct conf + { + const char* section; + const char* key; + const char* value; + int line; + }; + + typedef std::map> config_map; + std::string _file_name; + config_map _configs; + std::list _notifiers; + std::shared_ptr _file_data; +}; + +template<> inline double configuration::get_value(const char* section, const char* key, double default_value) +{ + std::string value; + if (!get_string_value_internal(section, key, "", value)) + { + printf("WARNING: configuration '[%s] %s' is not defined, default value is '%lf'\n", + section, + key, + default_value + ); + + return default_value; + } + else + { + return atof(value.c_str()); + } +} + + +template<> inline long long configuration::get_value(const char* section, const char* key, long long default_value) +{ + std::string value; + if (!get_string_value_internal(section, key, "", value)) + { + printf("WARNING: configuration '[%s] %s' is not defined, default value is '%lld'\n", + section, + key, + default_value + ); + + return default_value; + } + else + { + if (value.length() > 2 && (value.substr(0, 2) == "0x" || value.substr(0, 2) == "0X")) + { + long long unsigned int v; + sscanf(value.c_str(), "0x%llx", &v); + return v; + } + else + return (long long)(atol(value.c_str())); + } +} + +template<> inline long configuration::get_value(const char* section, const char* key, long default_value) +{ + std::string value; + if (!get_string_value_internal(section, key, "", value)) + { + printf("WARNING: configuration '[%s] %s' is not defined, default value is '%ld'\n", + section, + key, + default_value + ); + + return default_value; + } + else + { + if (value.length() > 2 && (value.substr(0, 2) == "0x" || value.substr(0, 2) == "0X")) + { + int v; + sscanf(value.c_str(), "0x%x", &v); + return v; + } + else + return (long)(atoi(value.c_str())); + } +} + +template<> inline int configuration::get_value(const char* section, const char* key, int default_value) +{ + return static_cast(get_value(section, key, default_value)); +} + +template<> inline unsigned int configuration::get_value(const char* section, const char* key, unsigned int default_value) +{ + return (unsigned int)(get_value(section, key, default_value)); +} + +template<> inline short configuration::get_value(const char* section, const char* key, short default_value) +{ + return (short)(get_value(section, key, default_value)); +} + +template<> inline unsigned short configuration::get_value(const char* section, const char* key, unsigned short default_value) +{ + return (unsigned short)(get_value(section, key, default_value)); +} + +template<> inline bool configuration::get_value(const char* section, const char* key, bool default_value) +{ + std::string value; + if (!get_string_value_internal(section, key, "", value)) + { + printf("WARNING: configuration '[%s] %s' is not defined, default value is '%s'\n", + section, + key, + default_value ? "true" : "false" + ); + + return default_value; + } + else if (strcmp(value.c_str(), "true") == 0 || strcmp(value.c_str(), "TRUE") == 0) + { + return true; + } + else + { + return false; + } +} + + +} // end namespace diff --git a/include/dsn/internal/coredump.h b/include/dsn/internal/coredump.h new file mode 100644 index 0000000000..b13da22257 --- /dev/null +++ b/include/dsn/internal/coredump.h @@ -0,0 +1,42 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include + +namespace dsn { + namespace utils { + + class coredump + { + public: + static void init(const char* dump_dir); + static void write(); + }; + } +} + + diff --git a/include/dsn/internal/customizable_id.h b/include/dsn/internal/customizable_id.h new file mode 100644 index 0000000000..ce792ed775 --- /dev/null +++ b/include/dsn/internal/customizable_id.h @@ -0,0 +1,186 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include + +namespace dsn { namespace utils { + +#define DEFINE_CUSTOMIZED_ID(T, name) __selectany const T name(#name); +#define DEFINE_CUSTOMIZED_ID_LONG(T, name, ...) __selectany const T name(#name, __VA_ARGS__); + +#define DEFINE_CUSTOMIZED_ID_TYPE(T) struct T##_{}; \ + typedef dsn::utils::customized_id T; + +template +class customized_id_mgr : public dsn::utils::singleton> +{ +public: + int get_id(const char* name) const; + const char* get_name(int id) const; + int register_id(const char* name); + int max_value() const { return static_cast(_names2.size()) - 1; } + +private: + std::map _names; + std::vector _names2; +}; + +template +struct customized_id +{ + customized_id(const char* name); + customized_id(const customized_id& source); + operator int() const; + const char* to_string() const; + void reset(const customized_id& r); + + static int max_value(); + static const char* to_string(int code); + static bool is_exist(const char* name); + static customized_id from_string(const char* name, customized_id invalid_value); + +protected: + static int assign(const char* xxx); + customized_id(int code); + +protected: + int _internal_code; + +//private: +// // no assignment operator +// customized_id& operator=(const customized_id& source); +}; + +// -------------------------- inline implementation ---------------------------- + +template +customized_id::customized_id(const char* name) + : _internal_code(assign(name)) +{ +} + +template +customized_id::customized_id(const customized_id& source) + : _internal_code(source._internal_code) +{ +} + +template +customized_id::operator int() const +{ + return _internal_code; +} + +template +const char* customized_id::to_string() const +{ + return customized_id_mgr::instance().get_name(_internal_code); +} + +template +void customized_id::reset(const customized_id& r) +{ + _internal_code = r._internal_code; +} + +template +int customized_id::max_value() +{ + return customized_id_mgr::instance().max_value(); +} + +template +const char* customized_id::to_string(int code) +{ + return customized_id_mgr::instance().get_name(code); +} + +template +bool customized_id::is_exist(const char* name) +{ + return customized_id_mgr::instance().get_id(name) != -1; +} + +template +customized_id customized_id::from_string(const char* name, customized_id invalid_value) +{ + int id = customized_id_mgr::instance().get_id(name); + if (id == -1) return invalid_value; + else return customized_id(id); +} + +template +int customized_id::assign(const char* name) +{ + return customized_id_mgr::instance().register_id(name); +} + +template +customized_id::customized_id(int code) + : _internal_code(code) +{ +} + +template +int customized_id_mgr::get_id(const char* name) const +{ + auto it = _names.find(std::string(name)); + if (it == _names.end()) + return -1; + else + return it->second; +} + +template +const char* customized_id_mgr::get_name(int id) const +{ + if (id < static_cast(_names2.size())) + return _names2[id].c_str(); + else + return "unknown"; +} + +template +int customized_id_mgr::register_id(const char* name) +{ + int id = get_id(name); + if (-1 != id) + { + return id; + } + + int code = static_cast(_names.size()); + _names[std::string(name)] = code; + _names2.push_back(std::string(name)); + return code; +} + +}} // end namespace dsn::utils diff --git a/include/dsn/internal/dsn_types.h b/include/dsn/internal/dsn_types.h new file mode 100644 index 0000000000..0e02ed0871 --- /dev/null +++ b/include/dsn/internal/dsn_types.h @@ -0,0 +1,155 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +#if defined(_WIN32) + +# define _WINSOCK_DEPRECATED_NO_WARNINGS 1 + +# include +# include +# include +# pragma comment(lib, "ws2_32.lib") + +__pragma(warning(disable:4127)) + +#define __thread __declspec(thread) +#define __selectany __declspec(selectany) extern +typedef HANDLE handle_t; + +#elif defined(__linux__) + +#define __selectany __attribute__((weak)) extern +typedef int handle_t; + +#elif defined(__MACH__) + +#define __selectany __attribute__((weak)) extern +typedef int handle_t; + +#else + +#error "unsupported platform" +#endif + +# ifndef TIME_MS_MAX +# define TIME_MS_MAX 0x0FFFFFFF +# endif + +# ifndef FIELD_OFFSET +# define FIELD_OFFSET(s, field) ((size_t)&((s *)(0))->field) +# endif + +# ifndef CONTAINING_RECORD +# define CONTAINING_RECORD(address, type, field) \ + ((type *)((PCHAR)(address)-(void*)(&((type *)0)->field))) +# endif + +# ifndef MAX_COMPUTERNAME_LENGTH +# define MAX_COMPUTERNAME_LENGTH 32 +# endif + +# ifndef __in_param +# define __in_param +# endif + +# ifndef __out_param +# define __out_param +# endif + +# ifndef __inout_param +# define __inout_param +# endif + +// stl headers +# include +# include +# include +# include +# include +# include + +// common c headers +# include +# include +# include +# include // for file open flags + +// common utilities +# include +# include +# include + +// common types +namespace dsn +{ + class ref_object + { + public: + ref_object() { ref_counter = 0; } + virtual ~ref_object() {} + std::atomic ref_counter; + + void add_ref() + { + ++ref_counter; + } + + void release_ref() + { + if (--ref_counter == 0) + delete this; + } + }; + +#define DEFINE_REF_OBJECT(T) \ + static void intrusive_ptr_add_ref(T* obj) \ + { \ + ++obj->ref_counter; \ + } \ + static void intrusive_ptr_release(T* obj) \ + { \ + if (--obj->ref_counter == 0) \ + delete obj; \ + } + + class task; + class message; + class rpc_client_session; + class rpc_server_session; + + typedef ::boost::intrusive_ptr task_ptr; + typedef ::boost::intrusive_ptr message_ptr; + typedef ::boost::intrusive_ptr rpc_client_session_ptr; + typedef ::boost::intrusive_ptr rpc_server_session_ptr; + + #define TOOL_TYPE_MAIN 0 + #define TOOL_TYPE_ASPECT 1 + + #define PROVIDER_TYPE_MAIN 0 + #define PROVIDER_TYPE_ASPECT 1 + +} // end namespace dsn diff --git a/include/dsn/internal/end_point.h b/include/dsn/internal/end_point.h new file mode 100644 index 0000000000..4c8c289a02 --- /dev/null +++ b/include/dsn/internal/end_point.h @@ -0,0 +1,132 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include + +namespace dsn { + +#define MAX_NODE_NAME_LENGTH MAX_COMPUTERNAME_LENGTH + +struct end_point +{ + uint32_t ip; // network order + uint16_t port; + std::string name; + + end_point() + { + ip = 0; + port = 0; + } + + end_point(uint32_t ip, uint16_t port, const char* n = "simulation") + : name(n) + { + ip = ip; + port = port; + } + + end_point(const end_point& source) + { + ip = source.ip; + port = source.port; + name = source.name; + } + + end_point(const char* str, uint16_t port); + + bool operator == (const end_point& r) const + { + return ip == r.ip && port == r.port; + } + + bool operator < (const end_point& r) const + { + return (ip < r.ip) || (ip == r.ip && port < r.port); + } + + bool operator != (const end_point& r) const + { + return !(*this == r); + } + + std::string to_ip_string(bool dotted = true) const + { + char buffer[32]; + if (dotted) + { + sprintf(buffer, "%u.%u.%u.%u", + ip & 0x000000ff, + (ip & 0x0000ff00) >> 8, + (ip & 0x00ff0000) >> 16, + (ip & 0xff000000) >> 24 + ); + } + else + { + sprintf(buffer, "%u", ip); + } + return buffer; + } + + std::string to_port_string(uint16_t addMore = 0) const + { + char buffer[16]; + sprintf(buffer, "%u", (uint32_t)(port + addMore)); + return buffer; + } + + bool operator()(const end_point& r) const + { + return port < r.port || (port == r.port && ip < r.ip); + } + + static const end_point INVALID; +}; + +#ifndef ZION_NOT_USE_DEFAULT_SERIALIZATION + +inline void unmarshall(::dsn::binary_reader& reader, __out_param end_point& val) +{ + reader.read_pod(val.ip); + reader.read_pod(val.port); + reader.read(val.name); +} + +inline void marshall(::dsn::binary_writer& writer, const end_point& val, uint16_t pos = 0xffff) +{ + writer.write_pod(val.ip, pos); + writer.write_pod(val.port, pos); + writer.write(val.name, pos); +} + +#endif + +} // end namespace + + diff --git a/include/dsn/internal/enum_helper.h b/include/dsn/internal/enum_helper.h new file mode 100644 index 0000000000..d3a774be10 --- /dev/null +++ b/include/dsn/internal/enum_helper.h @@ -0,0 +1,117 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include + +namespace dsn { + +// an invalid enum value must be provided so as to be the default value when parsing failed +#define ENUM_BEGIN2(type, name, invalid_value) \ + static inline enum_helper_xxx* RegisterEnu_##name() {\ + enum_helper_xxx* helper = new enum_helper_xxx(invalid_value); + +#define ENUM_BEGIN(type, invalid_value) ENUM_BEGIN2(type, type, invalid_value) + +#define ENUM_REG(e) helper->register_enum(#e, e); + +#define ENUM_END2(type, name) return helper; \ + } \ + inline type enum_from_string(const char* s, type invalid_value) {\ + return enum_helper_xxx::instance(RegisterEnu_##name).parse(s); \ + }\ + inline const char* enum_to_string(type val) {\ + return enum_helper_xxx::instance(RegisterEnu_##name).to_string(val); \ + } + +#define ENUM_END(type) ENUM_END2(type, type) + +template +class enum_helper_xxx +{ +private: + struct EnumContext + { + std::string name; + }; + +public: + enum_helper_xxx(TEnum invalid) : _invalid(invalid) {} + + void register_enum(const char* name, TEnum v) + { + _nameToValue[std::string(name)] = v; + + EnumContext ctx; + ctx.name.assign(name); + _valueToContext[v] = ctx; + } + + TEnum parse(const std::string& name) + { + auto it = _nameToValue.find(name); + return it != _nameToValue.end() ? it->second : _invalid; + } + + const char* to_string(TEnum v) + { + auto it = _valueToContext.find(v); + if (it != _valueToContext.end()) + { + return it->second.name.c_str(); + } + else + { + return "Unknown"; + } + } + + static enum_helper_xxx& instance(enum_helper_xxx* (*registor)()) + { + if (_instance == nullptr) + { + static std::once_flag flag; + std::call_once(flag, [&]() { + _instance = registor(); + }); + } + return *_instance; + } + +private: + static enum_helper_xxx *_instance; + +private: + TEnum _invalid; + std::map _valueToContext; + std::map _nameToValue; +}; + +template enum_helper_xxx* enum_helper_xxx::_instance = 0; + +} // end namespace diff --git a/include/dsn/internal/env_provider.h b/include/dsn/internal/env_provider.h new file mode 100644 index 0000000000..7a3c4e0d43 --- /dev/null +++ b/include/dsn/internal/env_provider.h @@ -0,0 +1,51 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include + +namespace dsn { + +class env_provider +{ +public: + template static env_provider* create(env_provider* inner_provider) + { + return new T(inner_provider); + } + +public: + env_provider(env_provider* inner_provider); + + virtual uint64_t now_ns() const { return get_current_physical_time_ns(); } + + virtual uint64_t random64(uint64_t min, uint64_t max); + +public: + static uint64_t get_current_physical_time_ns(); +}; + +} // end namespace diff --git a/include/dsn/internal/error_code.h b/include/dsn/internal/error_code.h new file mode 100644 index 0000000000..f97c56fc3e --- /dev/null +++ b/include/dsn/internal/error_code.h @@ -0,0 +1,100 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +#include +#include + +namespace dsn { + +struct error_code : public dsn::utils::customized_id +{ + error_code(const char* name) : dsn::utils::customized_id(name) + { + dassert (name, "name for an error code cannot be empty"); + _used = false; + } + + error_code() : dsn::utils::customized_id(0) + { + _used = true; + } + + error_code(const error_code& err) : dsn::utils::customized_id(err) + { + _used = false; + } + + error_code& operator=(const error_code& source) + { + _internal_code = source.get(); + _used = false; + return *this; + } + + ~error_code() + { + //assert (_used, "error code is not handled"); + } + + int get() const { _used = true; return operator int(); } + + void set(int err) { _internal_code = err; _used = false; } + +private: + mutable bool _used; +}; + +#define DEFINE_ERR_CODE(x) __selectany const ::dsn::error_code x(#x); + +DEFINE_ERR_CODE(ERR_SUCCESS) +DEFINE_ERR_CODE(ERR_SERVICE_NOT_FOUND) +DEFINE_ERR_CODE(ERR_SERVICE_ALREADY_RUNNING) +DEFINE_ERR_CODE(ERR_IO_PENDING) +DEFINE_ERR_CODE(ERR_TIMEOUT) +DEFINE_ERR_CODE(ERR_SERVICE_NOT_ACTIVE) +DEFINE_ERR_CODE(ERR_BUSY) +DEFINE_ERR_CODE(ERR_NETWORK_INIT_FALED) +DEFINE_ERR_CODE(ERR_TALK_TO_OTHERS) +DEFINE_ERR_CODE(ERR_OBJECT_NOT_FOUND) +DEFINE_ERR_CODE(ERR_HANDLER_NOT_FOUND) +DEFINE_ERR_CODE(ERR_LEARN_FILE_FALED) +DEFINE_ERR_CODE(ERR_INVALID_VERSION) +DEFINE_ERR_CODE(ERR_INVALID_PARAMETERS) +DEFINE_ERR_CODE(ERR_CAPACITY_EXCEEDED) +DEFINE_ERR_CODE(ERR_INVALID_STATE) +DEFINE_ERR_CODE(ERR_NOT_ENOUGH_MEMBER) +DEFINE_ERR_CODE(ERR_FILE_OPERATION_FAILED) +DEFINE_ERR_CODE(ERR_HANDLE_EOF) +DEFINE_ERR_CODE(ERR_WRONG_CHECKSUM) +DEFINE_ERR_CODE(ERR_INVALID_DATA) +DEFINE_ERR_CODE(ERR_VERSION_OUTDATED) +DEFINE_ERR_CODE(ERR_PATH_NOT_FOUND) +DEFINE_ERR_CODE(ERR_PATH_ALREADY_EXIST) +DEFINE_ERR_CODE(ERR_ADDRESS_ALREADY_USED) + +} // end namespace + diff --git a/include/dsn/internal/extensible_object.h b/include/dsn/internal/extensible_object.h new file mode 100644 index 0000000000..732ae8a1aa --- /dev/null +++ b/include/dsn/internal/extensible_object.h @@ -0,0 +1,223 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include + +namespace dsn { + +typedef void (*extension_deletor)(void*); + +class extensible +{ +public: + extensible(uint64_t* ptr, uint32_t count) + { + _ptr = ptr; + _count = count; + } + + void set_extension(uint32_t id, uint64_t data) + { + dbg_dassert(id < _count); + _ptr[id] = data; + } + + uint64_t& get_extension(uint32_t id) + { + dbg_dassert(id < _count); + return _ptr[id]; + } + +private: + uint64_t *_ptr; + uint32_t _count; +}; + +template +class extensible_object : public extensible +{ +public: + static const uint32_t INVALID_SLOT = 0xffffffff; + static const uint64_t INVALID_VALUE = 0x0ULL; + +public: + extensible_object() + : extensible(_extensions, MAX_EXTENSION_COUNT) + { + memset((void*)_extensions, 0, sizeof(_extensions)); + } + + ~extensible_object() + { + int maxId = static_cast(get_extension_count()); + + for (int i = 0; i < maxId; i++) + { + if (_extensions[i] != extensible_object::INVALID_VALUE && s_extensionDeletors[i] != nullptr) + { + s_extensionDeletors[i]((void*)_extensions[i]); + } + } + } + + void move_to(extensible_object& r) + { + int maxId = static_cast(get_extension_count()); + + for (int i = 0; i < maxId; i++) + { + r._extensions[i] = _extensions[i]; + if (s_extensionDeletors[i] != nullptr) + { + _extensions[i] = extensible_object::INVALID_VALUE; + } + } + } + + static uint32_t register_extension(extension_deletor deletor = nullptr) + { + int idx = s_nextExtensionIndex++; + if (idx < MAX_EXTENSION_COUNT) + { + s_extensionDeletors[idx] = deletor; + } + else + { + idx = INVALID_SLOT; + dassert (false, "allocate extension failed, not enough slots available"); + } + return idx; + } + + static uint32_t get_extension_count() + { + return s_nextExtensionIndex.load(); + } + +private: + uint64_t _extensions[MAX_EXTENSION_COUNT]; + static extension_deletor s_extensionDeletors[MAX_EXTENSION_COUNT]; + static std::atomic s_nextExtensionIndex; +}; + +/*! +\brief ExtensionHelper + +steps to use an ExtensionHelper +- implement an ExtensionHelper class, e.g. + class F : public ExtensionHelper, make sure T is an extension_object. +- add extra information as member fields of class F. +- invoke F::register() at system initialization +- use F::get(host_object) to retrive F object where host_object is of T type. +- once F object is here, you can access your extra information freely. + */ + +template +class uint64_extension_helper +{ +public: + static uint32_t register_ext() + { + s_slotIdx = TExtensibleObject::register_extension(); + return s_slotIdx; + } + + static uint64_t& get(TExtensibleObject* ctx) + { + return ctx->get_extension(s_slotIdx); + } + + static void set(TExtensibleObject* ctx, uint64_t ext) + { + ctx->set_extension(s_slotIdx, ext); + } + +private: + static uint32_t s_slotIdx; +}; + +template +class object_extension_helper +{ +public: + static uint32_t register_ext(extension_deletor deletor = nullptr) + { + s_slotIdx = TExtensibleObject::register_extension(deletor); + return s_slotIdx; + } + + static TExtension* get(TExtensibleObject* ctx) + { + uint64_t& val = ctx->get_extension(s_slotIdx); + return (TExtension*)val; + } + + static void set(TExtensibleObject* ctx, TExtension* ext) + { + ctx->set_extension(s_slotIdx, (uint64_t)ext); + } + + static TExtension* get_inited(TExtensibleObject* ctx) + { + uint64_t& val = ctx->get_extension(s_slotIdx); + if (val == TExtensibleObject::INVALID_VALUE) + { + TExtension* obj = new TExtension(); + val = (uint64_t)obj; + } + return (TExtension*)val; + } + + static void clear(TExtensibleObject* ctx) + { + uint64_t& val = ctx->get_extension(s_slotIdx); + if (val != TExtensibleObject::INVALID_VALUE) + { + s_deletor ((TExtension*)val); + val = TExtensibleObject::INVALID_VALUE; + } + } + +private: + static uint32_t s_slotIdx; + static extension_deletor s_deletor; +}; + + +//--- inline implementation ----------- +template extension_deletor extensible_object::s_extensionDeletors[MAX_EXTENSION_COUNT]; +template std::atomic extensible_object::s_nextExtensionIndex(0); + +template uint32_t uint64_extension_helper::s_slotIdx = 0; + +template uint32_t object_extension_helper::s_slotIdx = 0; +template extension_deletor object_extension_helper::s_deletor = nullptr; + +} // end namespace dsn diff --git a/include/dsn/internal/factory_store.h b/include/dsn/internal/factory_store.h new file mode 100644 index 0000000000..216225c95f --- /dev/null +++ b/include/dsn/internal/factory_store.h @@ -0,0 +1,147 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include + +namespace dsn { namespace utils { + +template +class factory_store +{ +public: + template + static bool register_factory(const char* name, TFactory factory, int type) + { + factory_entry entry; + entry.dummy = nullptr; + entry.factory = (void*)factory; + entry.type = type; + return singleton_store::instance().put(std::string(name), entry); + } + + template + static TFactory get_factory(const char* name, int type) + { + factory_entry entry; + if (singleton_store::instance().get(std::string(name), entry)) + { + if (entry.type != type) + { + report_error(name, type); + return nullptr; + } + else + { + TFactory f; + f = *(TFactory*)&entry.factory; + return f; + } + } + else + { + report_error(name, type); + return nullptr; + } + } + + template + static TResult* create(const char* name, int type, T1 t1, T2 t2, T3 t3, T4 t4, T5 t5) + { + typedef TResult* (*TFactory)(T1,T2,T3,T4,T5); + TFactory f = get_factory(name, type); + return f ? f(t1, t2, t3, t4, t5) : nullptr; + } + + template + static TResult* create(const char* name, int type, T1 t1, T2 t2, T3 t3, T4 t4) + { + typedef TResult* (*TFactory)(T1,T2,T3,T4); + TFactory f = get_factory(name, type); + return f ? f(t1, t2, t3, t4) : nullptr; + } + + template + static TResult* create(const char* name, int type, T1 t1, T2 t2, T3 t3) + { + typedef TResult* (*TFactory)(T1,T2,T3); + TFactory f = get_factory(name, type); + return f ? f(t1, t2, t3) : nullptr; + } + + template + static TResult* create(const char* name, int type, T1 t1, T2 t2) + { + typedef TResult* (*TFactory)(T1,T2); + TFactory f = get_factory(name, type); + return f ? f(t1, t2) : nullptr; + } + + template + static TResult* create(const char* name, int type, T1 t1) + { + typedef TResult* (*TFactory)(T1); + TFactory f = get_factory(name, type); + return f ? f(t1) : nullptr; + } + + static TResult* create(const char* name, int type) + { + typedef TResult* (*TFactory)(); + TFactory f = get_factory(name, type); + return f ? f() : nullptr; + } + +private: + static void report_error(const char* name, int type) + { + dlog(dsn::log_level_FATAL, "factory.store", "cannot find factory '%s' with factory type %s", name, type == PROVIDER_TYPE_MAIN ? "provider" : "aspect"); + + printf("cannot find factory '%s' with factory type %s\n", name, type == PROVIDER_TYPE_MAIN ? "provider" : "aspect"); + + std::vector keys; + singleton_store::instance().get_all_keys(keys); + printf ("\tthe following %u factories are registered:\n", static_cast(keys.size())); + for (auto it = keys.begin(); it != keys.end(); it++) + { + factory_entry entry; + singleton_store::instance().get(*it, entry); + printf("\t\t%s (type: %s)\n", it->c_str(), entry.type == PROVIDER_TYPE_MAIN ? "provider" : "aspect"); + } + printf ("\tPlease specify the correct factory name in your tool_app or in configuration file\n"); + } + +private: + struct factory_entry + { + TResult* dummy; + void* factory; + int type; + }; +}; + +}} // end namespace dsn::utils diff --git a/include/dsn/internal/global_config.h b/include/dsn/internal/global_config.h new file mode 100644 index 0000000000..d60f3c5641 --- /dev/null +++ b/include/dsn/internal/global_config.h @@ -0,0 +1,129 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include + +namespace dsn { + +struct service_app_spec +{ + int id; + std::string name; + std::string type; + std::string arguments; + std::vector ports; + int delay_seconds; + bool run; + + service_app_spec() {} + service_app_spec(const service_app_spec& r); + bool init(const char* section, configuration_ptr& config); +}; + +struct network_config_spec +{ + // [ key + int port; + rpc_channel channel; + // ] + + network_header_format hdr_format; + std::string factory_name; + int message_buffer_block_size; + + network_config_spec(const network_config_spec& r); + network_config_spec() : channel(RPC_CHANNEL_TCP), hdr_format(NET_HDR_DSN) {} + network_config_spec(int p, rpc_channel c); + bool operator < (const network_config_spec& r) const; +}; + +struct network_config_spec_default +{ + std::string factory_name; + int message_buffer_block_size; +}; + +typedef std::map network_conf; // => config +typedef std::map network_default_conf; // channel => config default + +struct service_spec +{ + configuration_ptr config; // config file + + std::string tool; // the main tool (only 1 is allowed for a time) + std::list toollets; // toollets enabled compatible to the main tool + std::string coredump_dir; // to store core dump files + + network_default_conf network_default_configs; // default network configs by tools + std::string aio_factory_name; + std::string env_factory_name; + std::string lock_factory_name; + std::string rwlock_factory_name; + std::string semaphore_factory_name; + std::string nfs_factory_name; + std::string perf_counter_factory_name; + std::string logging_factory_name; + + std::list network_aspects; // toollets compatible to the above network main providers in network configs + std::list aio_aspects; // toollets compatible to main aio provider + std::list env_aspects; + std::list lock_aspects; + std::list rwlock_aspects; + std::list semaphore_aspects; + + network_conf network_configs; + std::vector threadpool_specs; + std::vector app_specs; + + service_spec() {} + + bool init(configuration_ptr config); + bool register_network(const network_config_spec& netcs, bool force); + bool build_network_spec(int port); +}; + +enum syste_exit_type +{ + SYS_EXIT_NORMAL, + SYS_EXIT_BREAK, // Ctrl-C/Break,Shutdown,LogOff, see SetConsoleCtrlHandler + SYS_EXIT_EXCEPTION, + + SYS_EXIT_INVALID +}; + +ENUM_BEGIN(syste_exit_type, SYS_EXIT_INVALID) + ENUM_REG(SYS_EXIT_NORMAL) + ENUM_REG(SYS_EXIT_BREAK) + ENUM_REG(SYS_EXIT_EXCEPTION) +ENUM_END(syste_exit_type) + +} + diff --git a/include/dsn/internal/join_point.h b/include/dsn/internal/join_point.h new file mode 100644 index 0000000000..1db0954afb --- /dev/null +++ b/include/dsn/internal/join_point.h @@ -0,0 +1,357 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include + + +namespace dsn +{ + +class join_point_base +{ +public: + join_point_base(const char* name); + + bool put_front(void* fn, const char* name, bool is_native = false); + bool put_back(void* fn, const char* name, bool is_native = false); + bool put_before(const char* base, void* fn, const char* name, bool is_native = false); + bool put_after(const char* base, void* fn, const char* name, bool is_native = false); + bool remove(const char* name); + bool put_replace(const char* base, void* fn, const char* name); + + const char* name() const { return _name.c_str(); } + +protected: + struct advice_entry + { + std::string name; + void *func; + bool is_native; + advice_entry *next; + advice_entry *prev; + }; + + advice_entry _hdr; + std::string _name; + +private: + advice_entry* new_entry(void* fn, const char* name, bool is_native); + advice_entry* get_by_name(const char* name); +}; + +struct join_point_unused_type {}; + +template +class join_point; + +template +class join_point : public join_point_base +{ +public: + typedef TReturn (*point_prototype)(T1, T2, T3); + typedef void (*advice_prototype)(T1, T2, T3); + +public: + join_point(const char* name) : join_point_base(name) {} + bool put_native(point_prototype point) { return join_point_base::put_front((void*)point, "native", true); } + bool put_front(advice_prototype fn, const char* name) { return join_point_base::put_front((void*)fn, name); } + bool put_back(advice_prototype fn, const char* name) { return join_point_base::put_back((void*)fn, name); } + bool put_before(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_before(base, (void*)fn, name); } + bool put_after(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_after(base, (void*)fn, name); } + bool put_replace(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_replace(base, (void*)fn, name); } + + TReturn execute(T1 p1, T2 p2, T3 p3, TReturn default_return_value) + { + TReturn returnValue = default_return_value; + advice_entry* p = _hdr.next; + while (p != &_hdr) + { + if (p->is_native) + { + returnValue = (*(point_prototype*)&p->func)(p1, p2, p3); + } + else + { + (*(advice_prototype*)&p->func)(p1, p2, p3); + } + p = p->next; + } + return returnValue; + } +}; + + +template +class join_point : public join_point_base +{ +public: + typedef void (*point_prototype)(T1, T2, T3); + typedef void (*advice_prototype)(T1, T2, T3); + +public: + join_point(const char* name) : join_point_base(name) {} + bool put_native(point_prototype point) { return join_point_base::put_front((void*)point, "native", true); } + bool put_front(advice_prototype fn, const char* name) { return join_point_base::put_front((void*)fn, name); } + bool put_back(advice_prototype fn, const char* name) { return join_point_base::put_back((void*)fn, name); } + bool put_before(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_before(base, (void*)fn, name); } + bool put_after(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_after(base, (void*)fn, name); } + bool put_replace(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_replace(base, (void*)fn, name); } + + void execute(T1 p1, T2 p2, T3 p3) + { + advice_entry* p = _hdr.next; + while (p != &_hdr) + { + if (p->is_native) + { + (*(point_prototype*)&p->func)(p1, p2, p3); + } + else + { + (*(advice_prototype*)&p->func)(p1, p2, p3); + } + p = p->next; + } + } +}; + + +template +class join_point : public join_point_base +{ +public: + typedef TReturn (*point_prototype)(T1, T2); + typedef void (*advice_prototype)(T1, T2); + +public: + join_point(const char* name) : join_point_base(name) {} + bool put_native(point_prototype point) { return join_point_base::put_front((void*)point, "native", true); } + bool put_front(advice_prototype fn, const char* name) { return join_point_base::put_front((void*)fn, name); } + bool put_back(advice_prototype fn, const char* name) { return join_point_base::put_back((void*)fn, name); } + bool put_before(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_before(base, (void*)fn, name); } + bool put_after(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_after(base, (void*)fn, name); } + bool put_replace(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_replace(base, (void*)fn, name); } + + TReturn execute(T1 p1, T2 p2, TReturn default_return_value) + { + TReturn returnValue = default_return_value; + advice_entry* p = _hdr.next; + while (p != &_hdr) + { + if (p->is_native) + { + returnValue = (*(point_prototype*)&p->func)(p1, p2); + } + else + { + (*(advice_prototype*)&p->func)(p1, p2); + } + p = p->next; + } + return returnValue; + } +}; + + +template +class join_point : public join_point_base +{ +public: + typedef void (*point_prototype)(T1, T2); + typedef void (*advice_prototype)(T1, T2); + +public: + join_point(const char* name) : join_point_base(name) {} + bool put_native(point_prototype point) { return join_point_base::put_front((void*)point, "native", true); } + bool put_front(advice_prototype fn, const char* name) { return join_point_base::put_front((void*)fn, name); } + bool put_back(advice_prototype fn, const char* name) { return join_point_base::put_back((void*)fn, name); } + bool put_before(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_before(base, (void*)fn, name); } + bool put_after(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_after(base, (void*)fn, name); } + bool put_replace(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_replace(base, (void*)fn, name); } + + void execute(T1 p1, T2 p2) + { + advice_entry* p = _hdr.next; + while (p != &_hdr) + { + if (p->is_native) + { + (*(point_prototype*)&p->func)(p1, p2); + } + else + { + (*(advice_prototype*)&p->func)(p1, p2); + } + p = p->next; + } + } +}; + + +template +class join_point : public join_point_base +{ +public: + typedef TReturn (*point_prototype)(T1); + typedef void (*advice_prototype)(T1); + +public: + join_point(const char* name) : join_point_base(name) {} + bool put_native(point_prototype point) { return join_point_base::put_front((void*)point, "native", true); } + bool put_front(advice_prototype fn, const char* name) { return join_point_base::put_front((void*)fn, name); } + bool put_back(advice_prototype fn, const char* name) { return join_point_base::put_back((void*)fn, name); } + bool put_before(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_before(base, (void*)fn, name); } + bool put_after(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_after(base, (void*)fn, name); } + bool put_replace(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_replace(base, (void*)fn, name); } + + TReturn execute(T1 p1, TReturn default_return_value) + { + TReturn returnValue = default_return_value; + advice_entry* p = _hdr.next; + while (p != &_hdr) + { + if (p->is_native) + { + returnValue = (*(point_prototype*)&p->func)(p1); + } + else + { + (*(advice_prototype*)&p->func)(p1); + } + p = p->next; + } + return returnValue; + } +}; + + +template +class join_point : public join_point_base +{ +public: + typedef void (*point_prototype)(T1); + typedef void (*advice_prototype)(T1); + +public: + join_point(const char* name) : join_point_base(name) {} + bool put_native(point_prototype point) { return join_point_base::put_front((void*)point, "native", true); } + bool put_front(advice_prototype fn, const char* name) { return join_point_base::put_front((void*)fn, name); } + bool put_back(advice_prototype fn, const char* name) { return join_point_base::put_back((void*)fn, name); } + bool put_before(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_before(base, (void*)fn, name); } + bool put_after(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_after(base, (void*)fn, name); } + bool put_replace(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_replace(base, (void*)fn, name); } + + void execute(T1 p1) + { + advice_entry* p = _hdr.next; + while (p != &_hdr) + { + if (p->is_native) + { + (*(point_prototype*)&p->func)(p1); + } + else + { + (*(advice_prototype*)&p->func)(p1); + } + p = p->next; + } + } +}; + +template +class join_point : public join_point_base +{ +public: + typedef TReturn (*point_prototype)(); + typedef void (*advice_prototype)(); + +public: + join_point(const char* name) : join_point_base(name) {} + bool put_native(point_prototype point) { return join_point_base::put_front((void*)point, "native", true); } + bool put_front(advice_prototype fn, const char* name) { return join_point_base::put_front((void*)fn, name); } + bool put_back(advice_prototype fn, const char* name) { return join_point_base::put_back((void*)fn, name); } + bool put_before(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_before(base, (void*)fn, name); } + bool put_after(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_after(base, (void*)fn, name); } + bool put_replace(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_replace(base, (void*)fn, name); } + + TReturn execute(TReturn default_return_value) + { + TReturn returnValue = default_return_value; + advice_entry* p = _hdr.next; + while (p != &_hdr) + { + if (p->is_native) + { + returnValue = (*(point_prototype*)&p->func)(); + } + else + { + (*(advice_prototype*)&p->func)(); + } + p = p->next; + } + return returnValue; + } +}; + + +template<> +class join_point : public join_point_base +{ +public: + typedef void (*point_prototype)(); + typedef void(*advice_prototype)(); + +public: + join_point(const char* name) : join_point_base(name) {} + bool put_native(point_prototype point) { return join_point_base::put_front((void*)point, "native", true); } + bool put_front(advice_prototype fn, const char* name) { return join_point_base::put_front((void*)fn, name); } + bool put_back(advice_prototype fn, const char* name) { return join_point_base::put_back((void*)fn, name); } + bool put_before(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_before(base, (void*)fn, name); } + bool put_after(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_after(base, (void*)fn, name); } + bool put_replace(const char* base, advice_prototype fn, const char* name) { return join_point_base::put_replace(base, (void*)fn, name); } + + void execute() + { + advice_entry* p = _hdr.next; + while (p != &_hdr) + { + if (p->is_native) + { + (*(point_prototype*)&p->func)(); + } + else + { + (*(advice_prototype*)&p->func)(); + } + p = p->next; + } + } +}; + +} // end namespace dsn diff --git a/include/dsn/internal/link.h b/include/dsn/internal/link.h new file mode 100644 index 0000000000..596cfc155e --- /dev/null +++ b/include/dsn/internal/link.h @@ -0,0 +1,106 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +// class T : public slink +template +class slink +{ +public: + slink() : _next(nullptr){} + + T* next() const { return _next; } + + void insert_after(T* o) + { + T* n = _next; + _next = o; + o->_next = n; + } + + T* remove_next() + { + if (_next) + { + auto n = _next; + _next = _next->_next; + return n; + } + else + return nullptr; + } + +private: + T *_next; +}; + + +// class T : public dlink +template +class dlink +{ +public: + dlink() { _next = _prev = dynamic_cast(this); } + T* next() const { return _next; } + T* prev() const { return _prev; } + bool is_alone() const { return _next == _prev; } + + void insert_after(T* o) + { + auto n = _next; + + this->_next = o; + o->_prev = dynamic_cast(this); + + o->_next = n; + n->_prev = o; + } + + void insert_before(T* o) + { + auto n = _prev; + + this->_prev = o; + o->_next = dynamic_cast(this); + + o->_prev = n; + n->_next = o; + } + + T* remove() + { + if (!is_alone()) + { + this->_next->_prev = this->_prev; + this->_prev->_next = this->_next; + } + return dynamic_cast(this); + } + +private: + T* _next; + T* _prev; +}; diff --git a/include/dsn/internal/logging.h b/include/dsn/internal/logging.h new file mode 100644 index 0000000000..b55fbeb5d7 --- /dev/null +++ b/include/dsn/internal/logging.h @@ -0,0 +1,71 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include + +#ifdef _WIN32 +__pragma(warning(disable:4127)) +#endif + +namespace dsn { + +enum logging_level +{ + log_level_INFORMATION, + log_level_DEBUG, + log_level_WARNING, + log_level_ERROR, + log_level_FATAL +}; + +extern void logv(const char *file, const char *function, const int line, logging_level logLevel, const char* title, const char* fmt, va_list args); + +extern void logv(const char *file, const char *function, const int line, logging_level logLevel, const char* title, const char* fmt, ...); + +extern void logv(const char *file, const char *function, const int line, logging_level logLevel, const char* title); +} // end namespace + +#define dlog(level, title, ...) dsn::logv(__FILE__, __FUNCTION__, __LINE__, level, title, __VA_ARGS__) +#define dinfo(...) dlog(dsn::log_level_INFORMATION, __TITLE__, __VA_ARGS__) +#define ddebug(...) dlog(dsn::log_level_DEBUG, __TITLE__, __VA_ARGS__) +#define dwarn(...) dlog(dsn::log_level_WARNING, __TITLE__, __VA_ARGS__) +#define derror(...) dlog(dsn::log_level_ERROR, __TITLE__, __VA_ARGS__) +#define dfatal(...) dlog(dsn::log_level_FATAL, __TITLE__, __VA_ARGS__) +#define dassert(x, ...) do { if (!(x)) { \ + dlog(dsn::log_level_FATAL, "assert", #x); \ + dlog(dsn::log_level_FATAL, "assert", __VA_ARGS__); \ + ::dsn::utils::coredump::write(); ::abort(); \ + } } while (false) + +#ifdef _DEBUG +#define dbg_dassert dassert +#else +#define dbg_dassert(x, ...) +#endif diff --git a/include/dsn/internal/logging_provider.h b/include/dsn/internal/logging_provider.h new file mode 100644 index 0000000000..ef05be9ebc --- /dev/null +++ b/include/dsn/internal/logging_provider.h @@ -0,0 +1,51 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include + +namespace dsn { + +class logging_provider +{ +public: + template static logging_provider* create(const char *parameter) + { + return new T(parameter); + } + +public: + logging_provider(const char *parameter) {} + + virtual ~logging_provider(void) { } + + virtual void logv(const char *file, const char *function, const int line, logging_level logLevel, const char* title, const char* fmt, va_list args) = 0; +}; + + +// ----------------------- inline implementation --------------------------------------- +} // end namespace diff --git a/include/dsn/internal/message_parser.h b/include/dsn/internal/message_parser.h new file mode 100644 index 0000000000..16ea649ca6 --- /dev/null +++ b/include/dsn/internal/message_parser.h @@ -0,0 +1,76 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +# include + +namespace dsn +{ + class message_parser + { + public: + template static message_parser* create(int buffer_block_size) + { + return new T(buffer_block_size); + } + + public: + message_parser(int buffer_block_size); + + // before read + void* read_buffer_ptr(int read_next); + int read_buffer_capacity() const; + + // afer read, see if we can compose a message + virtual message_ptr on_read(int read_length, __out_param int& read_next) = 0; + + // before write + virtual void get_output_buffers(message_ptr& msg, __out_param std::vector& buffers) = 0; + + protected: + void create_new_buffer(int sz); + void mark_read(int read_length); + + protected: + blob _read_buffer; + int _read_buffer_occupied; + int _buffer_block_size; + }; + + class dsn_message_parser : public message_parser + { + public: + dsn_message_parser(int buffer_block_size); + + virtual message_ptr on_read(int read_length, __out_param int& read_next); + + virtual void get_output_buffers(message_ptr& msg, __out_param std::vector& buffers) + { + return msg->writer().get_buffers(buffers); + } + }; +} \ No newline at end of file diff --git a/include/dsn/internal/network.h b/include/dsn/internal/network.h new file mode 100644 index 0000000000..3d914abc8c --- /dev/null +++ b/include/dsn/internal/network.h @@ -0,0 +1,122 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include + +namespace dsn { + + class rpc_engine; + class rpc_client_matcher; + class service_node; + + class network + { + public: + template static network* create(rpc_engine* srv, network* inner_provider) + { + return new T(srv, inner_provider); + } + + public: + network(rpc_engine* srv, network* inner_provider); + virtual ~network() {} + + service_node* node() const; + rpc_engine* engine() const { return _engine; } + + std::shared_ptr new_client_matcher(); + std::shared_ptr new_message_parser(); + + rpc_server_session_ptr get_server_session(const end_point& ep); + void on_server_session_accepted(rpc_server_session_ptr& s); + void on_server_session_disconnected(rpc_server_session_ptr& s); + + rpc_client_session_ptr get_client_session(const end_point& ep); + void on_client_session_disconnected(rpc_client_session_ptr& s); + + virtual error_code start(rpc_channel channel, int port, bool client_only) = 0; + virtual const end_point& address() = 0; + virtual rpc_client_session_ptr create_client_session(const end_point& server_addr) = 0; + + // used by rpc_engine only + void reset_parser(network_header_format name, int message_buffer_block_size); + void call(message_ptr& request, rpc_response_task_ptr& call); + + protected: + rpc_engine *_engine; + network_header_format _parser_type; + int _message_buffer_block_size; + + typedef std::map client_sessions; + client_sessions _clients; + utils::rw_lock _clients_lock; + + typedef std::map server_sessions; + server_sessions _servers; + utils::rw_lock _servers_lock; + }; + + + class rpc_client_session : public ref_object + { + public: + rpc_client_session(network& net, const end_point& remote_addr, std::shared_ptr& matcher); + bool on_recv_reply(uint64_t key, message_ptr& reply, int delay_ms); + void on_disconnected(); + void call(message_ptr& request, rpc_response_task_ptr& call); + const end_point& remote_address() const { return _remote_addr; } + + virtual void connect() = 0; + virtual void send(message_ptr& msg) = 0; + + protected: + network &_net; + end_point _remote_addr; + std::shared_ptr _matcher; + }; + + DEFINE_REF_OBJECT(rpc_client_session) + + class rpc_server_session : public ref_object + { + public: + rpc_server_session(network& net, const end_point& remote_addr); + void on_recv_request(message_ptr& msg, int delay_ms); + void on_disconnected(); + const end_point& remote_address() const { return _remote_addr; } + + virtual void send(message_ptr& reply_msg) = 0; + + protected: + network& _net; + end_point _remote_addr; + }; + + DEFINE_REF_OBJECT(rpc_server_session) +} diff --git a/include/dsn/internal/nfs.h b/include/dsn/internal/nfs.h new file mode 100644 index 0000000000..544bacbb3d --- /dev/null +++ b/include/dsn/internal/nfs.h @@ -0,0 +1,66 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include + +namespace dsn { + + struct remote_copy_request + { + end_point source; + std::string source_dir; + std::vector files; + std::string dest_dir; + bool overwrite; + }; + + struct remote_copy_response + { + + }; + + extern void marshall(::dsn::binary_writer& writer, const remote_copy_request& val, uint16_t pos = 0xffff); + + extern void unmarshall(::dsn::binary_reader& reader, __out_param remote_copy_request& val); + + class nfs_node + { + public: + template static nfs_node* create(service_node* node) + { + return new T(node); + } + + public: + nfs_node(service_node* node) : _node(node) {} + + virtual void call(std::shared_ptr& rci, aio_task_ptr& calback) = 0; + + protected: + service_node* _node; + }; +} diff --git a/include/dsn/internal/perf_counter.h b/include/dsn/internal/perf_counter.h new file mode 100644 index 0000000000..0b5f89c682 --- /dev/null +++ b/include/dsn/internal/perf_counter.h @@ -0,0 +1,93 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include + +namespace dsn { + +enum perf_counter_type +{ + COUNTER_TYPE_NUMBER, + COUNTER_TYPE_RATE, + COUNTER_TYPE_NUMBER_PERCENTILES, + COUNTER_TYPE_INVALID, + COUNTER_TYPE_COUNT +}; + +ENUM_BEGIN(perf_counter_type, COUNTER_TYPE_INVALID) + ENUM_REG(COUNTER_TYPE_NUMBER) + ENUM_REG(COUNTER_TYPE_RATE) + ENUM_REG(COUNTER_TYPE_NUMBER_PERCENTILES) +ENUM_END(perf_counter_type) + +enum counter_percentile_type +{ + COUNTER_PERCENTILE_50, + COUNTER_PERCENTILE_90, + COUNTER_PERCENTILE_95, + COUNTER_PERCENTILE_99, + COUNTER_PERCENTILE_999, + + COUNTER_PERCENTILE_COUNT, + COUNTER_PERCENTILE_INVALID +}; + +ENUM_BEGIN(counter_percentile_type, COUNTER_PERCENTILE_INVALID) + ENUM_REG(COUNTER_PERCENTILE_50) + ENUM_REG(COUNTER_PERCENTILE_90) + ENUM_REG(COUNTER_PERCENTILE_95) + ENUM_REG(COUNTER_PERCENTILE_99) + ENUM_REG(COUNTER_PERCENTILE_999) +ENUM_END(counter_percentile_type) + +class perf_counter; +typedef perf_counter* (*perf_counter_factory)(const char *section, const char *name, perf_counter_type type); + +class perf_counter +{ +public: + template static perf_counter* create(const char *section, const char *name, perf_counter_type type) + { + return new T(section, name, type); + } + +public: + perf_counter(const char *section, const char *name, perf_counter_type type) {} + virtual ~perf_counter(void) {} + + virtual void increment() = 0; + virtual void decrement() = 0; + virtual void add(uint64_t val) = 0; + virtual void set(uint64_t val) = 0; + virtual double get_value() = 0; + virtual double get_percentile(counter_percentile_type type) = 0; +}; + +typedef std::shared_ptr perf_counter_ptr; + +} // end namespace diff --git a/include/dsn/internal/perf_counters.h b/include/dsn/internal/perf_counters.h new file mode 100644 index 0000000000..b15f9f5916 --- /dev/null +++ b/include/dsn/internal/perf_counters.h @@ -0,0 +1,74 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include + +namespace dsn { namespace utils { + +class perf_counters : public dsn::utils::singleton +{ +public: + perf_counters(void); + ~perf_counters(void); + + perf_counter_ptr get_counter( + const char *section, + const char *name, + perf_counter_type flags, + bool create_if_not_exist = false + ); + + bool remove_counter(const char* section, const char* name); + + perf_counter_ptr get_counter( + const char *name, + perf_counter_type flags, + bool create_if_not_exist = false) + { + return get_counter("dsn", name, flags, create_if_not_exist); + } + + bool remove_counter(const char* name) + { + return remove_counter("dsn", name); + } + + void register_factory(perf_counter_factory factory); + +private: + typedef std::map > same_section_counters; + typedef std::map all_counters; + + mutable utils::rw_lock _lock; + all_counters _counters; + perf_counter_factory _factory; +}; + +}} // end namespace dsn::utils diff --git a/include/dsn/internal/priority_queue.h b/include/dsn/internal/priority_queue.h new file mode 100644 index 0000000000..78d6ffd40b --- /dev/null +++ b/include/dsn/internal/priority_queue.h @@ -0,0 +1,192 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include +#include +#include +#include +#include + +namespace dsn { namespace utils { + +template> +class priority_queue +{ +public: + priority_queue(const std::string& name) + { + _name = name; + _count = 0; + _peeked_item = nullptr; + } + + virtual long enqueue(T obj, uint32_t priority) + { + dassert (priority >= 0 && priority < priority_count, "wrong priority"); + + std::lock_guard l(_lock); + { + _items[priority].push(obj); + return ++_count; + } + } + + virtual T peek() + { + std::lock_guard l(_lock); + + // already peeked + if (nullptr != _peeked_item) + return nullptr; + + else + { + long ct = 0; + _peeked_item = dequeue_impl(ct); + return _peeked_item; + } + } + + virtual T dequeue_peeked() + { + std::lock_guard l(_lock); + auto c = _peeked_item; + _peeked_item = nullptr; + return c; + } + + bool is_peeked() + { + std::lock_guard l(_lock); + return _peeked_item != nullptr; + } + + virtual T dequeue() + { + std::lock_guard l(_lock); + long ct = 0; + return dequeue_impl(ct); + } + + virtual T dequeue(__out_param long& ct) + { + std::lock_guard l(_lock); + return dequeue_impl(ct); + } + + const std::string& get_name() const { return _name;} + + long count() const { std::lock_guard l(_lock); return _count; } + +protected: + T dequeue_impl(__out_param long& ct, bool pop = true) + { + if (_count == 0) + { + return nullptr; + } + + ct = --_count; + + int index = priority_count - 1; + for (; index >= 0; index--) + { + if (_items[index].size() > 0) + { + break; + } + } + + dassert (index >= 0, "must find something"); + auto c = _items[index].front(); + _items[index].pop(); + return c; + } + +protected: + std::string _name; + T _peeked_item; + TQueue _items[priority_count]; + long _count; + mutable std::mutex _lock; +}; + +template> +class blocking_priority_queue : public priority_queue +{ +public: + blocking_priority_queue(const std::string& name) + : priority_queue(name) + { + _wait_count = 0; + } + + virtual long enqueue(T obj, uint32_t priority) + { + long r; + std::lock_guard l(priority_queue::_lock); + + priority_queue::_items[priority].push(obj); + r = ++priority_queue::_count; + + if (_wait_count > 0) + { + _cond.notify_one(); + } + + return r; + } + + virtual T dequeue(__out_param long ct, int millieseconds = TIME_MS_MAX) + { + std::unique_lock l(priority_queue::_lock); + + if (priority_queue::_count > 0) + { + return priority_queue::dequeue_impl(ct); + } + + ++_wait_count; + if (millieseconds == TIME_MS_MAX) + { + _cond.wait(l); + } + else + { + _cond.wait_for(l, std::chrono::milliseconds(millieseconds)); + } + --_wait_count; + + return priority_queue::dequeue_impl(ct); + } + +private: + std::condition_variable _cond; + int _wait_count; +}; + +}} // end namespace diff --git a/include/dsn/internal/rpc_message.h b/include/dsn/internal/rpc_message.h new file mode 100644 index 0000000000..6c5d99c32c --- /dev/null +++ b/include/dsn/internal/rpc_message.h @@ -0,0 +1,132 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include +# include + +namespace dsn { + +struct message_header +{ + int32_t hdr_crc32; + int32_t body_crc32; + int32_t body_length; + int32_t version; + uint64_t id; + uint64_t rpc_id; + char rpc_name[MAX_TASK_CODE_NAME_LENGTH + 1]; + + // info from client => server + union + { + struct + { + uint64_t timeout_ts_us; // target timeout stamp in us + int32_t hash; + uint16_t port; + } client; + + struct + { + int32_t error; + } server; + }; + + // local fields - no need to be transmitted + end_point from_address; + end_point to_address; + uint16_t local_rpc_code; + + static int serialized_size() + { + return static_cast(FIELD_OFFSET(message_header, from_address)); + } + + void marshall(binary_writer& writer); + void unmarshall(binary_reader& reader); + void new_rpc_id(); + + static bool is_right_header(char* hdr); + static int get_body_length(char* hdr) + { + return ((message_header*)hdr)->body_length; + } +}; + +class rpc_server_session; +class message : public ref_object, public extensible_object +{ +public: + message(); // write + message(blob bb, bool parse_hdr = true); // read + virtual ~message(); + + // + // routines for request and response + // + static message_ptr create_request(task_code rpc_code, int timeout_milliseconds = 0, int hash = 0); + message_ptr create_response(); + + // + // routines for reader & writer + // + binary_reader& reader() { return *_reader; } + binary_writer& writer() { return *_writer; } + + // + // meta info + // + void seal(bool fillCrc, bool is_placeholder = false); + message_header& header() { return _msg_header; } + int total_size() const { return is_read() ? _reader->total_size() : _writer->total_size(); } + bool is_read() const { return _reader != nullptr; } + error_code error() const { error_code ec; ec.set(_msg_header.server.error); return ec; } + bool is_right_header() const; + bool is_right_body() const; + static uint64_t new_id() { return ++_id; } + rpc_server_session_ptr& server_session() { return _server_session; } + +private: + void read_header(); + +private: + message_header _msg_header; + binary_reader *_reader; + binary_writer *_writer; + rpc_server_session_ptr _server_session; + +protected: + static std::atomic _id; +}; + +DEFINE_REF_OBJECT(message) + +} // end namespace diff --git a/include/dsn/internal/serialization.h b/include/dsn/internal/serialization.h new file mode 100644 index 0000000000..c93540eb21 --- /dev/null +++ b/include/dsn/internal/serialization.h @@ -0,0 +1,188 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include + +// pod types +#define DEFINE_POD_SERIALIZATION(T) \ + inline void marshall(::dsn::binary_writer& writer, const T& val, uint16_t pos = 0xffff)\ + {\ + writer.write((const char*)&val, static_cast(sizeof(val)), pos); \ + }\ + inline void unmarshall(::dsn::binary_reader& reader, __out_param T& val)\ + {\ + reader.read((char*)&val, static_cast(sizeof(T))); \ + } + +namespace dsn { + + template + inline void marshall(::dsn::message_ptr& writer, const T& val, uint16_t pos = 0xffff) + { + marshall(writer->writer(), val, pos); + } + + template + inline void unmarshall(::dsn::message_ptr& reader, __out_param T& val) + { + unmarshall(reader->reader(), val); + } + +#ifndef DSN_NOT_USE_DEFAULT_SERIALIZATION + + DEFINE_POD_SERIALIZATION(bool) + DEFINE_POD_SERIALIZATION(char) + //DEFINE_POD_SERIALIZATION(size_t) + DEFINE_POD_SERIALIZATION(float) + DEFINE_POD_SERIALIZATION(double) + DEFINE_POD_SERIALIZATION(int8_t) + DEFINE_POD_SERIALIZATION(uint8_t) + DEFINE_POD_SERIALIZATION(int16_t) + DEFINE_POD_SERIALIZATION(uint16_t) + DEFINE_POD_SERIALIZATION(int32_t) + DEFINE_POD_SERIALIZATION(uint32_t) + DEFINE_POD_SERIALIZATION(int64_t) + DEFINE_POD_SERIALIZATION(uint64_t) + + // error_code + inline void marshall(::dsn::binary_writer& writer, const error_code& val, uint16_t pos = 0xffff) + { + int err = val.get(); + marshall(writer, err, pos); + } + + inline void unmarshall(::dsn::binary_reader& reader, __out_param error_code& val) + { + int err; + unmarshall(reader, err); + val.set(err); + } + + + // std::string + inline void marshall(::dsn::binary_writer& writer, const std::string& val, uint16_t pos = 0xffff) + { + writer.write(val, pos); + } + + inline void unmarshall(::dsn::binary_reader& reader, __out_param std::string& val) + { + reader.read(val); + } + + // end point + //extern inline void marshall(::dsn::binary_writer& writer, const end_point& val, uint16_t pos = 0xffff); + //extern inline void unmarshall(::dsn::binary_reader& reader, __out_param end_point& val); + + // blob + inline void marshall(::dsn::binary_writer& writer, const blob& val, uint16_t pos = 0xffff) + { + writer.write(val, pos); + } + + inline void unmarshall(::dsn::binary_reader& reader, __out_param blob& val) + { + reader.read(val); + } + + // for generic list + template + inline void marshall(::dsn::binary_writer& writer, const std::list& val, uint16_t pos = 0xffff) + { + int sz = static_cast(val.size()); + marshall(writer, sz, pos); + for (auto& v : val) + { + marshall(writer, v, pos); + } + } + + template + inline void unmarshall(::dsn::binary_reader& reader, __out_param std::list& val) + { + int sz; + unmarshall(reader, sz); + val.resize(sz); + for (auto& v : val) + { + unmarshall(reader, v); + } + } + + // for generic vector + template + inline void marshall(::dsn::binary_writer& writer, const std::vector& val, uint16_t pos = 0xffff) + { + int sz = static_cast(val.size()); + marshall(writer, sz, pos); + for (auto& v : val) + { + marshall(writer, v, pos); + } + } + + template + inline void unmarshall(::dsn::binary_reader& reader, __out_param std::vector& val) + { + int sz; + unmarshall(reader, sz); + val.resize(sz); + for (auto& v : val) + { + unmarshall(reader, v); + } + } + + // for generic set + template + inline void marshall(::dsn::binary_writer& writer, const std::set, std::allocator>& val, uint16_t pos = 0xffff) + { + int sz = static_cast(val.size()); + marshall(writer, sz, pos); + for (auto& v : val) + { + marshall(writer, v, pos); + } + } + + template + inline void unmarshall(::dsn::binary_reader& reader, __out_param std::set, std::allocator>& val) + { + int sz; + unmarshall(reader, sz); + val.resize(sz); + for (auto& v : val) + { + unmarshall(reader, v); + } + } +#endif +} diff --git a/include/dsn/internal/service.api.oo.h b/include/dsn/internal/service.api.oo.h new file mode 100644 index 0000000000..8028569972 --- /dev/null +++ b/include/dsn/internal/service.api.oo.h @@ -0,0 +1,293 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include + +namespace dsn { + namespace service { + + namespace tasking + { + task_ptr enqueue( + task_code evt, + servicelet *context, + task_handler callback, + int hash = 0, + int delay_milliseconds = 0, + int timer_interval_milliseconds = 0 + ); + + template // where T : public virtual servicelet + inline task_ptr enqueue( + task_code evt, + T* owner, + void (T::*callback)(), + int hash = 0, + int delay_milliseconds = 0, + int timer_interval_milliseconds = 0 + ) + { + task_handler h = std::bind(callback, owner); + return enqueue( + evt, + owner, + h, + hash, + delay_milliseconds, + timer_interval_milliseconds + ); + } + } + + namespace rpc + { + // + // for TRequest/TResponse, we assume that the following routines are defined: + // marshall(binary_writer& writer, const T& val); + // unmarshall(binary_reader& reader, __out_param T& val); + // either in the namespace of ::dsn::utils or T + // developers may write these helper functions by their own, or use tools + // such as protocol-buffer, thrift, or bond to generate these functions automatically + // for their TRequest and TResponse + // + + // no callback + template + void call_one_way_typed( + const end_point& server, + task_code code, + const TRequest& req, + int hash = 0 + ); + + // callback type 1: + // void (T::*callback)(error_code, std::shared_ptr&, std::shared_ptr&) + template + rpc_response_task_ptr call_typed( + const end_point& server, + task_code code, + std::shared_ptr& req, + T* owner, + void (T::*callback)(error_code, std::shared_ptr&, std::shared_ptr&), + int request_hash = 0, + int timeout_milliseconds = 0, + int reply_hash = 0 + ); + + // callback type 2: + // std::function&, std::shared_ptr&)> + template + rpc_response_task_ptr call_typed( + const end_point& server, + task_code code, + std::shared_ptr& req, + servicelet* owner, + std::function&, std::shared_ptr&)> callback, + int request_hash = 0, + int timeout_milliseconds = 0, + int reply_hash = 0 + ); + + // callback type 5 + // void (T::*)(error_code, const TResponse&, void*); + template + inline rpc_response_task_ptr call_typed( + const end_point& server, + task_code code, + const TRequest& req, + T* owner, + void(T::*callback)(error_code, const TResponse&, void*), + void* context, + int request_hash = 0, + int timeout_milliseconds = 0, + int reply_hash = 0 + ); + + // callback type 3: + // std::function + template + rpc_response_task_ptr call_typed( + const end_point& server, + task_code code, + const TRequest& req, + servicelet* owner, + std::function callback, + void* context, + int request_hash = 0, + int timeout_milliseconds = 0, + int reply_hash = 0 + ); + + // callback type 4: + // std::function + rpc_response_task_ptr call( + const end_point& server, + message_ptr& request, + servicelet* owner, + std::function callback, + int reply_hash = 0 + ); + + // multiple rpc layered using the same request and response message + // callback type 5: + // std::function&, std::shared_ptr&)> + // return true when the system need to continue the next callback + class layered_rpc : public rpc_response_task, public service_context_manager + { + public: + layered_rpc(servicelet* owner, message_ptr& request, int hash = 0); + virtual ~layered_rpc(); + + template + static layered_rpc& first( + task_code code, + std::shared_ptr& req, + servicelet* owner, + std::function&, std::shared_ptr&)> callback, + int request_hash = 0, + int timeout_milliseconds = 0, + int reply_hash = 0 + ); + + template + layered_rpc& append( + std::shared_ptr& req, + std::function&, std::shared_ptr&)> callback + ); + + rpc_response_task_ptr call(const end_point& server); + + virtual void exec(); + virtual void on_response(error_code err, message_ptr& request, message_ptr& response) {} + + public: + class layered_rpc_handler + { + public: + virtual bool exec( + error_code err, + message_ptr& response) = 0; + virtual ~layered_rpc_handler() {} + }; + std::list _handlers; + }; + } + + namespace file + { + aio_task_ptr read( + handle_t hFile, + char* buffer, + int count, + uint64_t offset, + task_code callback_code, + servicelet* owner, + aio_handler callback, + int hash = 0 + ); + + aio_task_ptr write( + handle_t hFile, + const char* buffer, + int count, + uint64_t offset, + task_code callback_code, + servicelet* owner, + aio_handler callback, + int hash = 0 + ); + + template + inline aio_task_ptr read( + handle_t hFile, + char* buffer, + int count, + uint64_t offset, + task_code callback_code, + T* owner, + void(T::*callback)(error_code, uint32_t), + int hash = 0 + ) + { + aio_handler h = std::bind(callback, owner, std::placeholders::_1, std::placeholders::_2); + return read(hFile, buffer, count, offset, callback_code, owner, h, hash); + } + + template + inline aio_task_ptr write( + handle_t hFile, + const char* buffer, + int count, + uint64_t offset, + task_code callback_code, + T* owner, + void(T::*callback)(error_code, uint32_t), + int hash = 0 + ) + { + aio_handler h = std::bind(callback, owner, std::placeholders::_1, std::placeholders::_2); + return write(hFile, buffer, count, offset, callback_code, owner, h, hash); + } + + aio_task_ptr copy_remote_files( + const end_point& remote, + std::string& source_dir, + std::vector& files, // empty for all + std::string& dest_dir, + bool overwrite, + task_code callback_code, + servicelet* owner, + aio_handler callback, + int hash = 0 + ); + + inline aio_task_ptr copy_remote_directory( + const end_point& remote, + std::string& source_dir, + std::string& dest_dir, + bool overwrite, + task_code callback_code, + servicelet* owner, + aio_handler callback, + int hash = 0 + ) + { + std::vector files; + return copy_remote_files( + remote, source_dir, files, dest_dir, overwrite, + callback_code, owner, callback, hash + ); + } + } + + } // end namespace service +} // end namespace + +# include + + + diff --git a/include/dsn/internal/service.api.oo.impl.h b/include/dsn/internal/service.api.oo.impl.h new file mode 100644 index 0000000000..193015d998 --- /dev/null +++ b/include/dsn/internal/service.api.oo.impl.h @@ -0,0 +1,475 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +namespace dsn { + namespace service + { + namespace rpc + { + namespace internal_use_only + { + template + class service_rpc_response_task1 : public rpc_response_task, public service_context_manager + { + public: + service_rpc_response_task1( + T* svc, + std::shared_ptr& req, + void (T::*callback)(error_code, std::shared_ptr&, std::shared_ptr&), + message_ptr& request, + int hash = 0 + ) + : rpc_response_task(request, hash), service_context_manager(svc, this) + { + _svc = svc; + _req = req; + _callback = callback; + } + + virtual void on_response(error_code err, message_ptr& request, message_ptr& response) + { + if (err == ERR_SUCCESS) + { + std::shared_ptr resp(new TResponse); + unmarshall(response->reader(), *resp); + (_svc->*_callback)(err, _req, resp); + } + else + { + std::shared_ptr resp(nullptr); + (_svc->*_callback)(err, _req, resp); + } + } + + private: + T* _svc; + std::shared_ptr _req; + void (T::*_callback)(error_code, std::shared_ptr&, std::shared_ptr&); + }; + + template + class service_rpc_response_task2 : public rpc_response_task, public service_context_manager + { + public: + service_rpc_response_task2( + servicelet* svc, + std::shared_ptr& req, + std::function&, std::shared_ptr&)>& callback, + message_ptr& request, + int hash = 0 + ) + : rpc_response_task(request, hash), service_context_manager(svc, this) + { + _req = req; + _callback = callback; + } + + virtual void on_response(error_code err, message_ptr& request, message_ptr& response) + { + if (nullptr != _callback) + { + if (err == ERR_SUCCESS) + { + std::shared_ptr resp(new TResponse); + unmarshall(response->reader(), *resp); + _callback(err, _req, resp); + } + else + { + std::shared_ptr resp(nullptr); + _callback(err, _req, resp); + } + _callback = nullptr; + } + } + + private: + std::shared_ptr _req; + std::function&, std::shared_ptr&)> _callback; + }; + + template + class service_rpc_response_task3 : public rpc_response_task, public service_context_manager + { + public: + service_rpc_response_task3( + servicelet* svc, + std::function& callback, + void* context, + message_ptr& request, + int hash = 0 + ) + : rpc_response_task(request, hash), service_context_manager(svc, this) + { + _callback = callback; + _context = context; + } + + virtual void on_response(error_code err, message_ptr& request, message_ptr& response) + { + if (nullptr != _callback) + { + TResponse resp; + if (err == ERR_SUCCESS) + { + unmarshall(response->reader(), resp); + _callback(err, resp, _context); + } + else + { + _callback(err, resp, _context); + } + _callback = nullptr; + } + } + + private: + std::function _callback; + void* _context; + }; + + class service_rpc_response_task4 : public rpc_response_task, public service_context_manager + { + public: + service_rpc_response_task4( + servicelet* svc, + std::function& callback, + message_ptr& request, + int hash = 0 + ) + : rpc_response_task(request, hash), service_context_manager(svc, this) + { + _callback = callback; + } + + virtual void on_response(error_code err, message_ptr& request, message_ptr& response) + { + if (nullptr != _callback) + { + _callback(err, request, response); + _callback = nullptr; + } + } + + private: + std::function _callback; + }; + + + template + class service_rpc_response_task5 : public rpc_response_task, public service_context_manager + { + public: + service_rpc_response_task5( + T* svc, + void (T::*callback)(error_code, const TResponse&, void*), + void* context, + message_ptr& request, + int hash = 0 + ) + : rpc_response_task(request, hash), service_context_manager(svc, this) + { + _svc = svc; + _callback = callback; + _context = context; + } + + virtual void on_response(error_code err, message_ptr& request, message_ptr& response) + { + TResponse resp; + if (err == ERR_SUCCESS) + { + unmarshall(response->reader(), resp); + (_svc->*_callback)(err, resp, _context); + } + else + { + (_svc->*_callback)(err, resp, _context); + } + } + + private: + T* _svc; + void* _context; + void (T::*_callback)(error_code, const TResponse&, void*); + }; + } + + // ------------- inline implementation ---------------- + template + inline void call_one_way_typed( + const end_point& server, + task_code code, + const TRequest& req, + int hash + ) + { + message_ptr msg = message::create_request(code, 0, hash); + marshall(msg->writer(), req); + rpc::call_one_way(server, msg); + } + + template + inline rpc_response_task_ptr call_typed( + const end_point& server, + task_code code, + std::shared_ptr& req, + T* owner, + void (T::*callback)(error_code, std::shared_ptr&, std::shared_ptr&), + int request_hash/* = 0*/, + int timeout_milliseconds /*= 0*/, + int reply_hash /*= 0*/ + ) + { + message_ptr msg = message::create_request(code, timeout_milliseconds, request_hash); + marshall(msg->writer(), *req); + + rpc_response_task_ptr resp_task(new internal_use_only::service_rpc_response_task1( + owner, + req, + callback, + msg, + reply_hash + )); + + return rpc::call(server, msg, resp_task); + } + + template + inline rpc_response_task_ptr call_typed( + const end_point& server, + task_code code, + std::shared_ptr& req, + servicelet* owner, + std::function&, std::shared_ptr&)> callback, + int request_hash/* = 0*/, + int timeout_milliseconds /*= 0*/, + int reply_hash /*= 0*/ + ) + { + message_ptr msg = message::create_request(code, timeout_milliseconds, request_hash); + marshall(msg->writer(), *req); + + rpc_response_task_ptr resp_task(new internal_use_only::service_rpc_response_task2( + owner, + req, + callback, + msg, + reply_hash + )); + + return rpc::call(server, msg, resp_task); + } + + template + inline rpc_response_task_ptr call_typed( + const end_point& server, + task_code code, + const TRequest& req, + T* owner, + void(T::*callback)(error_code, const TResponse&, void*), + void* context, + int request_hash/* = 0*/, + int timeout_milliseconds /*= 0*/, + int reply_hash /*= 0*/ + ) + { + message_ptr msg = message::create_request(code, timeout_milliseconds, request_hash); + marshall(msg->writer(), req); + + rpc_response_task_ptr resp_task(new internal_use_only::service_rpc_response_task5( + owner, + callback, + context, + msg, + reply_hash + )); + + return rpc::call(server, msg, resp_task); + } + + template + inline rpc_response_task_ptr call_typed( + const end_point& server, + task_code code, + const TRequest& req, + servicelet* owner, + std::function callback, + void* context, + int request_hash/* = 0*/, + int timeout_milliseconds /*= 0*/, + int reply_hash /*= 0*/ + ) + { + message_ptr msg = message::create_request(code, timeout_milliseconds, request_hash); + marshall(msg->writer(), req); + + rpc_response_task_ptr resp_task(new internal_use_only::service_rpc_response_task3( + owner, + callback, + context, + msg, + reply_hash + )); + + return rpc::call(server, msg, resp_task); + } + + template + class layered_rpc_handler_typed : public layered_rpc::layered_rpc_handler + { + public: + layered_rpc_handler_typed( + std::shared_ptr& req, + std::function&, std::shared_ptr&)> callback + ) + { + _req = req; + _callback = callback; + } + + virtual bool exec( + error_code err, + message_ptr& response) + { + if (err == ERR_SUCCESS) + { + auto r = std::shared_ptr(new TResponse); + unmarshall(response->reader(), *r); + _callback(err, _req, r); + } + } + + virtual ~layered_rpc_handler_typed() { } + + private: + std::shared_ptr _req; + std::function&, std::shared_ptr&)> _callback; + }; + + inline layered_rpc::layered_rpc(servicelet* owner, message_ptr& request, int hash) + : + rpc_response_task(request, hash), + service_context_manager(owner, this) + { + } + + template + inline /*static*/ layered_rpc& layered_rpc::first( + task_code code, + std::shared_ptr& req, + servicelet* owner, + std::function&, std::shared_ptr&)> callback, + int request_hash, + int timeout_milliseconds, + int reply_hash + ) + { + message_ptr request = message::create_request(code, timeout_milliseconds, request_hash); + layered_rpc *lr = new layered_rpc(owner, request, reply_hash); + + auto h = new layered_rpc_handler_typed(req, callback); + lr->_handlers.push_back(h); + + return *lr; + } + + template + inline layered_rpc& layered_rpc::append( + std::shared_ptr& req, + std::function&, std::shared_ptr&)> callback + ) + { + auto h = new layered_rpc_handler_typed(req, callback); + _handlers.push_back(h); + } + + inline void layered_rpc::exec() + { + bool c = true; + for (auto& h : _handlers) + { + if (c) + { + c = h->exec(error(), get_response()); + } + + delete h; + } + _handlers.clear(); + } + + inline rpc_response_task_ptr layered_rpc::call(const end_point& server) + { + dassert(_handlers.size() > 0, ""); + + auto cb = rpc_response_task_ptr(static_cast(this)); + return rpc::call(server, cb->get_request(), cb); + } + + inline layered_rpc::~layered_rpc() + { + for (auto& h : _handlers) + delete h; + _handlers.clear(); + } + } // end namespace rpc + + namespace file + { + namespace internal_use_only + { + class service_aio_task : public aio_task, public service_context_manager + { + public: + service_aio_task(task_code code, servicelet* svc, aio_handler& handler, int hash = 0) + : aio_task(code, hash), service_context_manager(svc, this) + { + _handler = handler; + } + + virtual void on_completed(error_code err, uint32_t transferred_size) + { + if (_handler != nullptr) + { + _handler(err, transferred_size); + _handler = nullptr; + } + } + + private: + aio_handler _handler; + }; + + } + } + + } // end namespace service +} // end namespace + + + diff --git a/include/dsn/internal/service_app.h b/include/dsn/internal/service_app.h new file mode 100644 index 0000000000..3bf8d5d661 --- /dev/null +++ b/include/dsn/internal/service_app.h @@ -0,0 +1,77 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include + +namespace dsn { +class service_node; +namespace service { + +class service_app +{ +public: + template static service_app* create(service_app_spec* s) + { + return new T(s); + } + + +public: + service_app(service_app_spec* s); + virtual ~service_app(void); + + virtual error_code start(int argc, char** argv) = 0; + + virtual void stop(bool cleanup = false) = 0; + + const service_app_spec& spec() const { return _spec; } + const std::string& name() const { return _spec.name; } + int arg_count() const { return static_cast(_args.size()); } + char** args() const { return (char**)&_args_ptr[0]; } + service_node* svc_node() const { return _svc_node; } + int id() const { return spec().id; } + +private: + friend class system_runner; + void set_service_node(service_node* node) { _svc_node = node; } + +private: + std::vector _args; + std::vector _args_ptr; + service_node* _svc_node; + service_app_spec _spec; +}; + +typedef service_app* (*service_app_factory)(service_app_spec*); + +}} // end namespace dsn::service_api + + diff --git a/include/dsn/internal/servicelet.h b/include/dsn/internal/servicelet.h new file mode 100644 index 0000000000..8babb7c821 --- /dev/null +++ b/include/dsn/internal/servicelet.h @@ -0,0 +1,104 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include + +namespace dsn { + typedef std::function task_handler; + typedef std::function aio_handler; + typedef std::function rpc_reply_handler; + + namespace service { + + // + // servicelet is the base class for RPC service and client + // there can be multiple servicelet in the system, mostly + // defined during initialization in main + // + class servicelet + { + public: + servicelet(); + virtual ~servicelet(); + + static end_point primary_address() { return rpc::primary_address(); } + static uint32_t random32(uint32_t min, uint32_t max) { return env::random32(min, max); } + static uint64_t random64(uint64_t min, uint64_t max) { return env::random64(min, max); } + static uint64_t now_ns() { return env::now_ns(); } + static uint64_t now_us() { return env::now_us(); } + static uint64_t now_ms() { return env::now_ms(); } + + protected: + friend class service_context_manager; + + int add_outstanding_task(task* tsk); + void remove_outstanding_task(int id); + void clear_outstanding_tasks(); + void check_hashed_access(); + + private: + int _last_id; + std::map _outstanding_tasks; + std::mutex _outstanding_tasks_lock; + + std::set _events; + std::thread::id _access_thread_id; + bool _access_thread_id_inited; + }; + + class service_context_manager + { + public: + service_context_manager(servicelet* owner, task* task) + { + _owner = owner; + if (nullptr != _owner) + { + _id = owner->add_outstanding_task(task); + } + } + + ~service_context_manager() + { + if (nullptr != _owner) + { + _owner->remove_outstanding_task(_id); + } + } + + void clear_context() { _owner = nullptr; } + + private: + int _id; + servicelet *_owner; + }; + } +} diff --git a/include/dsn/internal/singleton.h b/include/dsn/internal/singleton.h new file mode 100644 index 0000000000..4e7f65427e --- /dev/null +++ b/include/dsn/internal/singleton.h @@ -0,0 +1,66 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include + +namespace dsn { namespace utils { + +template +class singleton +{ +public: + singleton() {} + + static T& instance() + { + static std::once_flag flag; + + if (nullptr == _instance) + { + std::call_once(flag, [&]() { _instance = new T(); }); + } + return *_instance; + } + + static bool is_instance_created() + { + return nullptr != _instance; + } + +protected: + static T* _instance; + +private: + singleton(const singleton&); + singleton& operator=(const singleton&); +}; + +// ----- inline implementations ------------------------------------------------------------------- + +template T* singleton::_instance = 0; + +}} // end namespace dsn::utils diff --git a/include/dsn/internal/singleton_store.h b/include/dsn/internal/singleton_store.h new file mode 100644 index 0000000000..4eafae14f1 --- /dev/null +++ b/include/dsn/internal/singleton_store.h @@ -0,0 +1,87 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include + +namespace dsn { namespace utils { + +template> +class singleton_store : public dsn::utils::singleton> +{ +public: + bool put(TKey key, TValue val) + { + auto_write_lock l(_lock); + auto it = _store.find(key); + if (it != _store.end()) + return false; + else + { + _store.insert(std::make_pair(key, val)); + return true; + } + } + + bool get(TKey key, __out_param TValue& val) const + { + auto_read_lock l(_lock); + auto it = _store.find(key); + if (it != _store.end()) + { + val = it->second; + return true; + } + else + return false; + } + + bool remove(TKey key) + { + auto_write_lock l(_lock); + return _store.erase(key) > 0; + } + + void get_all_keys(__out_param std::vector& keys) + { + auto_read_lock l(_lock); + for (auto it = _store.begin(); it != _store.end(); it++) + { + keys.push_back(it->first); + } + } + +private: + std::map _store; + mutable rw_lock _lock; +}; + +//------------- inline implementation ---------- + +}} // end namespace dsn::utils diff --git a/include/dsn/internal/singleton_vector_store.h b/include/dsn/internal/singleton_vector_store.h new file mode 100644 index 0000000000..e76459bfb8 --- /dev/null +++ b/include/dsn/internal/singleton_vector_store.h @@ -0,0 +1,85 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include + +namespace dsn { namespace utils { + +template +class singleton_vector_store : public dsn::utils::singleton> +{ +public: + singleton_vector_store(void){} + ~singleton_vector_store(void){} + + bool contains(int index) const + { + if (index >= static_cast(_contains.size())) + return false; + else + return _contains[index]; + } + + T get(int index) const + { + if (index >= static_cast(_contains.size())) + return default_value; + else + return _values[index]; + } + + bool put(int index, T value) + { + if (index >= static_cast(_contains.size())) + { + for (int i = static_cast(_contains.size()); i < index; i++) + { + _contains.push_back(false); + _values.push_back(default_value); + } + + _contains.push_back(true); + _values.push_back(value); + return true; + } + else if (_contains[index]) + return false; + else + { + _contains[index] = true; + _values[index] = value; + return true; + } + } + +private: + std::vector _contains; + std::vector _values; +}; + +}} // end namespace dsn::utils diff --git a/include/dsn/internal/synchronize.h b/include/dsn/internal/synchronize.h new file mode 100644 index 0000000000..8c709f72e6 --- /dev/null +++ b/include/dsn/internal/synchronize.h @@ -0,0 +1,181 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include + +namespace dsn { namespace utils { + +class rw_lock +{ +public: + rw_lock() { } + ~rw_lock() {} + + void lock_read() { _lock.lock_shared(); } + bool try_lock_read() { return _lock.try_lock_shared(); } + void unlock_read() { _lock.unlock_shared(); } + + void lock_write() { _lock.lock(); } + bool try_lock_write() { return _lock.try_lock(); } + void unlock_write() { _lock.unlock(); } + +private: + boost::shared_mutex _lock; +}; + +class notify_event +{ +public: + notify_event() : _ready(false){} + void notify() { std::lock_guard l(_lk); _ready = true; _cond.notify_all(); } + void wait() { std::unique_lock l(_lk); _cond.wait(l, [&]{return _ready; }); } + bool wait_for(int milliseconds) + { + std::unique_lock l(_lk); + if (_ready) + { + return true; + } + else + { + return std::cv_status::no_timeout == _cond.wait_for(l, std::chrono::milliseconds(milliseconds)); + } + } + +private: + std::mutex _lk; + std::condition_variable _cond; + bool _ready; +}; + + +class semaphore +{ +public: + semaphore(int initialCount = 0) + : _count(initialCount), _waits(0) + { + } + + ~semaphore() + { + } + +public: + inline void signal() + { + signal(1); + } + + inline void signal(int count) + { + std::unique_lock l(_lk); + _count += count; + if (_waits > 0) + { + _cond.notify_one(); + } + } + + inline bool wait() + { + return wait(TIME_MS_MAX); + } + + inline bool wait(unsigned int milliseconds) + { + std::unique_lock l(_lk); + if (_count == 0) + { + _waits++; + + auto r = _cond.wait_for(l, std::chrono::milliseconds(milliseconds)); + + _waits--; + if (r == std::cv_status::timeout) + return false; + } + + dassert (_count > 0, "semphoare must be greater than zero"); + _count--; + return true; + } + + inline bool release() + { + signal(1); + return true; + } + +private: + std::mutex _lk; + std::condition_variable _cond; + int _count; + int _waits; +}; + + +//--------------------- helpers -------------------------------------- + +class auto_lock +{ +public: + auto_lock(std::recursive_mutex & lock) : _lock(&lock) { _lock->lock(); } + ~auto_lock() { _lock->unlock(); } + +private: + std::recursive_mutex * _lock; + + auto_lock(const auto_lock&); + auto_lock& operator=(const auto_lock&); +}; + +class auto_read_lock +{ +public: + auto_read_lock(rw_lock & lock) : _lock(&lock) { _lock->lock_read(); } + ~auto_read_lock() { _lock->unlock_read(); } + +private: + rw_lock * _lock; +}; + +class auto_write_lock +{ +public: + auto_write_lock(rw_lock & lock) : _lock(&lock) { _lock->lock_write(); } + ~auto_write_lock() { _lock->unlock_write(); } + +private: + rw_lock * _lock; +}; + +}} // end namespace diff --git a/include/dsn/internal/task.h b/include/dsn/internal/task.h new file mode 100644 index 0000000000..0f79892dd2 --- /dev/null +++ b/include/dsn/internal/task.h @@ -0,0 +1,224 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include +# include + +namespace dsn { + +//----------------- common task ------------------------------------------------------- + +class task_worker; +class task_worker_pool; +class service_node; + +class task : public ref_object, public extensible_object +{ +public: + task(task_code code, int hash = 0, service_node* node = nullptr); + virtual ~task(); + + virtual void exec() = 0; + + void exec_internal(); + bool cancel(bool wait_until_finished); + bool wait(int timeout_milliseconds = TIME_MS_MAX); + virtual void enqueue(); + void set_error_code(error_code err) { _error = err; } + void set_delay(int delay_milliseconds = 0) { _delay_milliseconds = delay_milliseconds; } + + uint64_t id() const { return _task_id; } + task_state state() const { return _state.load(); } + task_code code() const { return _spec->code; } + task_spec& spec() const { return *_spec; } + int hash() const { return _hash; } + int delay_milliseconds() const { return _delay_milliseconds; } + error_code error() const { return _error; } + service_node* node() const { return _node; } + const char* node_name() const; + bool is_empty() const { return _is_null; } + + + static task* get_current_task(); + static uint64_t get_current_task_id(); + static task_worker* get_current_worker(); + static void set_current_worker(task_worker* worker); + +protected: + void signal_waiters(); + void enqueue(task_worker_pool* pool); + void set_task_id(uint64_t tid) { _task_id = tid; } + + mutable std::atomic _state; + bool _is_null; + +private: + uint64_t _task_id; + std::atomic _wait_event; + int _hash; + int _delay_milliseconds; + error_code _error; + bool _wait_for_cancel; + task_spec *_spec; + service_node *_node; +}; + +DEFINE_REF_OBJECT(task) + +//----------------- timer task ------------------------------------------------------- + +class timer_task : public task +{ +public: + timer_task(task_code code, uint32_t interval_milliseconds, int hash = 0); + void exec(); + + virtual bool on_timer() = 0; + +private: + uint32_t _interval_milliseconds; +}; + +//----------------- rpc task ------------------------------------------------------- + +class service_node; +class rpc_request_task : public task +{ +public: + rpc_request_task(message_ptr& request, service_node* node); + + message_ptr& get_request() { return _request; } + void enqueue(service_node* node); + + virtual void exec() = 0; + +protected: + message_ptr _request; +}; + +typedef ::boost::intrusive_ptr rpc_request_task_ptr; + +class rpc_server_handler +{ +public: + virtual rpc_request_task_ptr new_request_task(message_ptr& request, service_node* node) = 0; + virtual ~rpc_server_handler(){} +}; + +struct rpc_handler_info +{ + task_code code; + std::string name; + rpc_server_handler *handler; + + rpc_handler_info(task_code code) : code(code) {} + ~rpc_handler_info() { delete handler; } +}; + +typedef std::shared_ptr rpc_handler_ptr; + +class rpc_response_task : public task +{ +public: + rpc_response_task(message_ptr& request, int hash = 0); + + virtual void on_response(error_code err, message_ptr& request, message_ptr& response) = 0; + + void enqueue(error_code err, message_ptr& reply); + virtual void enqueue() { task::enqueue(_caller_pool); } // re-enqueue after above enqueue + message_ptr& get_request() { return _request; } + message_ptr& get_response() { return _response; } + + virtual void exec(); + +private: + message_ptr _request; + message_ptr _response; + task_worker_pool *_caller_pool; + + friend class rpc_engine; +}; + +class rpc_response_task_empty : public rpc_response_task +{ +public: + rpc_response_task_empty(message_ptr& request, int hash = 0); + + virtual void on_response(error_code err, message_ptr& request, message_ptr& response) {} +}; + +typedef ::boost::intrusive_ptr rpc_response_task_ptr; + +//------------------------- disk AIO task --------------------------------------------------- + +enum aio_type +{ + AIO_Read, + AIO_Write +}; + +class disk_engine; +class disk_aio +{ +public: + // filled by apps + handle_t file; + void* buffer; + uint32_t buffer_size; + uint64_t file_offset; + + // filled by frameworks + aio_type type; + disk_engine *engine; +}; + +typedef ::std::shared_ptr disk_aio_ptr; + +class aio_task : public task +{ +public: + aio_task(task_code code, int hash = 0); + + void enqueue(error_code err, uint32_t transferred_size, service_node* node); + uint32_t get_transferred_size() const { return _transferred_size; } + disk_aio_ptr aio() { return _aio; } + void exec(); + + virtual void on_completed(error_code err, uint32_t transferred_size) = 0; + +private: + disk_aio_ptr _aio; + uint32_t _transferred_size; +}; + +typedef ::boost::intrusive_ptr aio_task_ptr; + +} // end namespace diff --git a/include/dsn/internal/task_code.h b/include/dsn/internal/task_code.h new file mode 100644 index 0000000000..d9b43a743f --- /dev/null +++ b/include/dsn/internal/task_code.h @@ -0,0 +1,209 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include +# include +# include +# include + +namespace dsn { + +enum task_type +{ + TASK_TYPE_RPC_REQUEST, + TASK_TYPE_RPC_RESPONSE, + TASK_TYPE_COMPUTE, + TASK_TYPE_AIO, + TASK_TYPE_CONTINUATION, + TASK_TYPE_COUNT, + TASK_TYPE_INVALID, +}; + +ENUM_BEGIN(task_type, TASK_TYPE_INVALID) + ENUM_REG(TASK_TYPE_RPC_REQUEST) + ENUM_REG(TASK_TYPE_RPC_RESPONSE) + ENUM_REG(TASK_TYPE_COMPUTE) + ENUM_REG(TASK_TYPE_AIO) + ENUM_REG(TASK_TYPE_CONTINUATION) +ENUM_END(task_type) + +enum task_priority +{ + TASK_PRIORITY_LOW, + TASK_PRIORITY_COMMON, + TASK_PRIORITY_HIGH, + TASK_PRIORITY_COUNT, + TASK_PRIORITY_INVALID, +}; + +ENUM_BEGIN(task_priority, TASK_PRIORITY_INVALID) + ENUM_REG(TASK_PRIORITY_LOW) + ENUM_REG(TASK_PRIORITY_COMMON) + ENUM_REG(TASK_PRIORITY_HIGH) +ENUM_END(task_priority) + +enum task_state +{ + TASK_STATE_READY, + TASK_STATE_RUNNING, + TASK_STATE_FINISHED, + TASK_STATE_CANCELLED, + + TASK_STATE_COUNT, + TASK_STATE_INVALID +}; + +ENUM_BEGIN(task_state, TASK_STATE_INVALID) + ENUM_REG(TASK_STATE_READY) + ENUM_REG(TASK_STATE_RUNNING) + ENUM_REG(TASK_STATE_FINISHED) + ENUM_REG(TASK_STATE_CANCELLED) +ENUM_END(task_state) + +#define MAX_TASK_CODE_NAME_LENGTH 47 + +// define network header format for RPC +DEFINE_CUSTOMIZED_ID_TYPE(network_header_format); +DEFINE_CUSTOMIZED_ID(network_header_format, NET_HDR_DSN); + +// define network channel types for RPC +DEFINE_CUSTOMIZED_ID_TYPE(rpc_channel) +DEFINE_CUSTOMIZED_ID(rpc_channel, RPC_CHANNEL_TCP) +DEFINE_CUSTOMIZED_ID(rpc_channel, RPC_CHANNEL_UDP) + +struct task_code : public dsn::utils::customized_id +{ + task_code(const char* xxx, task_type type, threadpool_code pool, task_priority pri, int rpcPairedCode); + + task_code(const task_code& source) + : dsn::utils::customized_id(source) + { + } + + task_code(int code) : dsn::utils::customized_id(code) {} + + static task_code from_string(const char* name, task_code invalid_value) + { + dsn::utils::customized_id id = dsn::utils::customized_id::from_string(name, invalid_value); + return task_code(id); + } + +private: + // no assignment operator + task_code& operator=(const task_code& source); +}; + +// task code with explicit name +#define DEFINE_NAMED_TASK_CODE(x, name, priority, pool) __selectany const dsn::task_code x(#name, dsn::TASK_TYPE_COMPUTE, pool, priority, 0); +#define DEFINE_NAMED_TASK_CODE_AIO(x, name, priority, pool) __selectany const dsn::task_code x(#name, dsn::TASK_TYPE_AIO, pool, priority, 0); + +// RPC between client and server, usually use different pools for server and client callbacks +#define DEFINE_NAMED_TASK_CODE_RPC(x, name, priority, pool) \ + __selectany const dsn::task_code x##_ACK(#name"_ACK", dsn::TASK_TYPE_RPC_RESPONSE, pool, priority, 0); \ + __selectany const dsn::task_code x(#name, dsn::TASK_TYPE_RPC_REQUEST, pool, priority, x##_ACK); + +#define DEFINE_NAMED_TASK_CODE_RPC_PRIVATE(x, name, priority, pool) \ + static const dsn::task_code x##_ACK(#name"_ACK", dsn::TASK_TYPE_RPC_RESPONSE, pool, priority, 0); \ + static const dsn::task_code x(#name, dsn::TASK_TYPE_RPC_REQUEST, pool, priority, x##_ACK); + +// auto name version +#define DEFINE_TASK_CODE(x, priority, pool) DEFINE_NAMED_TASK_CODE(x, x, priority, pool) +#define DEFINE_TASK_CODE_AIO(x, priority, pool) DEFINE_NAMED_TASK_CODE_AIO(x, x, priority, pool) +#define DEFINE_TASK_CODE_RPC(x, priority, pool) DEFINE_NAMED_TASK_CODE_RPC(x, x, priority, pool) +#define DEFINE_TASK_CODE_RPC_PRIVATE(x, priority, pool) DEFINE_NAMED_TASK_CODE_RPC_PRIVATE(x, x, priority, pool) + +DEFINE_TASK_CODE(TASK_CODE_INVALID, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) + +class task; +class aio_task; +class rpc_request_task; +class rpc_response_task; +class message; +class admission_controller; +typedef void (*task_rejection_handler)(task*, admission_controller*); + +class task_spec : public extensible_object +{ +public: + static task_spec* get(int ec); + +public: + task_code code; + task_type type; + const char* name; + task_code rpc_paired_code; + task_priority priority; + threadpool_code pool_code; + bool allow_inline; // allow task executed in other thread pools or tasks + bool fast_execution_in_network_thread; + network_header_format rpc_call_header_format; + + task_rejection_handler rejection_handler; + rpc_channel rpc_call_channel; + int32_t rpc_timeout_milliseconds; + int32_t rpc_retry_interval_milliseconds; + + // COMPUTE + join_point on_task_enqueue; + join_point on_task_begin; // TODO: parent task + join_point on_task_end; + join_point on_task_cancelled; + + join_point on_task_wait_pre; + join_point on_task_wait_post; // wait succeeded or timedout + join_point on_task_cancel_post; // cancel succeeded or not + + + // AIO + join_point on_aio_call; // return true means continue, otherwise early terminate with task::set_error_code + join_point on_aio_enqueue; // aio done, enqueue callback + + // RPC_REQUEST + join_point on_rpc_call; // return true means continue, otherwise dropped and (optionally) timedout + join_point on_rpc_request_enqueue; + + // RPC_RESPONSE + join_point on_rpc_reply; + join_point on_rpc_response_enqueue; // response, task + + // message data flow + join_point on_create_response; + +public: + task_spec(int code, const char* name, task_type type, threadpool_code pool, int paired_code, task_priority pri); + +public: + static bool init(configuration_ptr config); + void init_profiling(bool profile); +}; + +} // end namespace + diff --git a/include/dsn/internal/task_queue.h b/include/dsn/internal/task_queue.h new file mode 100644 index 0000000000..4670d78985 --- /dev/null +++ b/include/dsn/internal/task_queue.h @@ -0,0 +1,63 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include + +namespace dsn { + +class task_worker_pool; +class admission_controller; + +class task_queue +{ +public: + template static task_queue* create(task_worker_pool* pool, int index, task_queue* inner_provider) + { + return new T(pool, index, inner_provider); + } + +public: + task_queue(task_worker_pool* pool, int index, task_queue* inner_provider); + + virtual void enqueue(task_ptr& task) = 0; + virtual task_ptr dequeue() = 0; + virtual int count() const = 0; + + const std::string & get_name() { return _name; } + task_worker_pool* pool() const { return _pool; } + perf_counter_ptr& get_qps_counter() { return _qps_counter; } + admission_controller* controller() const { return _controller; } + void set_controller(admission_controller* controller) { _controller = controller; } + +private: + task_worker_pool* _pool; + std::string _name; + perf_counter_ptr _qps_counter; + admission_controller* _controller; +}; + +} // end namespace diff --git a/include/dsn/internal/task_worker.h b/include/dsn/internal/task_worker.h new file mode 100644 index 0000000000..968dba9fb1 --- /dev/null +++ b/include/dsn/internal/task_worker.h @@ -0,0 +1,83 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include + +namespace dsn { + +class task_worker : public extensible_object +{ +public: + template static task_worker* create(task_worker_pool* pool, task_queue* q, int index, task_worker* inner_provider) + { + return new T(pool, q, index, inner_provider); + } + +public: + task_worker(task_worker_pool* pool, task_queue* q, int index, task_worker* inner_provider); + virtual ~task_worker(void); + + // service management + void start(); + void stop(); + + virtual void loop(); // run tasks from _input_queueu + + // inquery + const std::string& name() const { return _name; } + int index() const { return _index; } + task_worker_pool* pool() const { return _owner_pool; } + task_queue* queue() const { return _input_queue; } + const threadpool_spec& pool_spec() const; + static task_worker* current(); + +private: + task_worker_pool* _owner_pool; + task_queue* _input_queue; + int _index; + std::string _name; + std::thread *_thread; + bool _is_running; + utils::notify_event _started; + +private: + void set_name(); + void set_priority(worker_priority_t pri); + void set_affinity(uint64_t affinity); + void run_internal(); + +public: + static join_point on_start; + static join_point on_create; +}; + +} // end namespace + + diff --git a/include/dsn/internal/threadpool_code.h b/include/dsn/internal/threadpool_code.h new file mode 100644 index 0000000000..afdd24a125 --- /dev/null +++ b/include/dsn/internal/threadpool_code.h @@ -0,0 +1,87 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include + +namespace dsn { + +enum worker_priority_t +{ + THREAD_xPRIORITY_LOWEST, + THREAD_xPRIORITY_BELOW_NORMAL, + THREAD_xPRIORITY_NORMAL, + THREAD_xPRIORITY_ABOVE_NORMAL, + THREAD_xPRIORITY_HIGHEST, + THREAD_xPRIORITY_COUNT, + THREAD_xPRIORITY_INVALID, +}; + +ENUM_BEGIN(worker_priority_t, THREAD_xPRIORITY_INVALID) + ENUM_REG(THREAD_xPRIORITY_LOWEST) + ENUM_REG(THREAD_xPRIORITY_BELOW_NORMAL) + ENUM_REG(THREAD_xPRIORITY_NORMAL) + ENUM_REG(THREAD_xPRIORITY_ABOVE_NORMAL) + ENUM_REG(THREAD_xPRIORITY_HIGHEST) +ENUM_END(worker_priority_t) + +DEFINE_CUSTOMIZED_ID_TYPE(threadpool_code) + +#define DEFINE_THREAD_POOL_CODE(x) DEFINE_CUSTOMIZED_ID(dsn::threadpool_code, x) + +DEFINE_THREAD_POOL_CODE(THREAD_POOL_INVALID) +DEFINE_THREAD_POOL_CODE(THREAD_POOL_DEFAULT) + +struct threadpool_spec +{ + std::string name; + threadpool_code pool_code; + bool run; + int worker_count; + worker_priority_t worker_priority; + bool worker_share_core; + uint64_t worker_affinity_mask; + unsigned int max_input_queue_length; // 0xFFFFFFFFUL by default + bool partitioned; // false by default + std::string queue_factory_name; + std::string worker_factory_name; + std::list queue_aspects; + std::list worker_aspects; + std::string admission_controller_factory_name; + std::string admission_controller_arguments; + + threadpool_spec(const threadpool_code& code) : pool_code(code), name(code.to_string()) {} + threadpool_spec(const char* name) : pool_code(name), name(name) {} + threadpool_spec(const threadpool_spec& source); + threadpool_spec& operator=(const threadpool_spec& source); + + static bool init(configuration_ptr& config, __out_param std::vector& specs); +}; + +} // end namespace diff --git a/include/dsn/internal/utils.h b/include/dsn/internal/utils.h new file mode 100644 index 0000000000..987af5582a --- /dev/null +++ b/include/dsn/internal/utils.h @@ -0,0 +1,256 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include + +namespace dsn { + + class blob + { + public: + blob() { _buffer = _data = 0; _length = 0; } + + blob(std::shared_ptr& buffer, int length) + : _holder(buffer), _buffer(buffer.get()), _data(buffer.get()), _length(length) + {} + + blob(std::shared_ptr& buffer, int offset, int length) + : _holder(buffer), _buffer(buffer.get()), _data(buffer.get() + offset), _length(length) + {} + + blob(const char* buffer, int offset, int length) + : _buffer(buffer), _data(buffer + offset), _length(length) + {} + + blob(const blob& source) + : _holder(source._holder), _buffer(source._buffer), _data(source._data), _length(source._length) + {} + + void assign(std::shared_ptr& buffer, int offset, int length) + { + _holder = buffer; + _buffer = (buffer.get()); + _data = (buffer.get() + offset); + _length = (length); + } + + const char* data() const { return _data; } + + int length() const { return _length; } + + std::shared_ptr buffer() { return _holder; } + + blob range(int offset) const + { + dassert(offset <= _length, "offset cannot exceed the current length value"); + + blob temp = *this; + temp._data += offset; + temp._length -= offset; + return temp; + } + + blob range(int offset, int len) const + { + dassert(offset <= _length, "offset cannot exceed the current length value"); + + blob temp = *this; + temp._data += offset; + temp._length -= offset; + dassert(temp._length >= len, "buffer length must exceed the required length"); + temp._length = len; + return temp; + } + + bool operator == (const blob& r) const + { + dassert(false, "not implemented"); + return false; + } + + private: + friend class binary_writer; + std::shared_ptr _holder; + const char* _buffer; + const char* _data; + int _length; // data length + }; + + class binary_reader + { + public: + binary_reader(blob& blob); + + template int read_pod(__out_param T& val); + template int read(__out_param T& val) { dassert(false, "read of this type is not implemented"); return 0; } + int read(__out_param int8_t& val) { return read_pod(val); } + int read(__out_param uint8_t& val) { return read_pod(val); } + int read(__out_param int16_t& val) { return read_pod(val); } + int read(__out_param uint16_t& val) { return read_pod(val); } + int read(__out_param int32_t& val) { return read_pod(val); } + int read(__out_param uint32_t& val) { return read_pod(val); } + int read(__out_param int64_t& val) { return read_pod(val); } + int read(__out_param uint64_t& val) { return read_pod(val); } + int read(__out_param bool& val) { return read_pod(val); } + + int read(__out_param std::string& s); + int read(char* buffer, int sz); + int read(blob& blob); + + bool next(const void** data, int* size); + bool skip(int count); + bool backup(int count); + + blob get_buffer() const { return _blob; } + blob get_remaining_buffer() const { return _blob.range(static_cast(_ptr - _blob.data())); } + bool is_eof() const { return _ptr >= _blob.data() + _size; } + int total_size() const { return _size; } + int get_remaining_size() const { return _remaining_size; } + + private: + blob _blob; + int _size; + const char* _ptr; + int _remaining_size; + }; + + class binary_writer + { + public: + binary_writer(int reservedBufferSize = 0); + binary_writer(blob& buffer); + ~binary_writer(); + + uint16_t write_placeholder(); + template void write_pod(const T& val, uint16_t pos = 0xffff); + template void write(const T& val, uint16_t pos = 0xffff) { dassert(false, "write of this type is not implemented"); } + void write(const int8_t& val, uint16_t pos = 0xffff) { write_pod(val, pos); } + void write(const uint8_t& val, uint16_t pos = 0xffff) { write_pod(val, pos); } + void write(const int16_t& val, uint16_t pos = 0xffff) { write_pod(val, pos); } + void write(const uint16_t& val, uint16_t pos = 0xffff) { write_pod(val, pos); } + void write(const int32_t& val, uint16_t pos = 0xffff) { write_pod(val, pos); } + void write(const uint32_t& val, uint16_t pos = 0xffff) { write_pod(val, pos); } + void write(const int64_t& val, uint16_t pos = 0xffff) { write_pod(val, pos); } + void write(const uint64_t& val, uint16_t pos = 0xffff) { write_pod(val, pos); } + void write(const bool& val, uint16_t pos = 0xffff) { write_pod(val, pos); } + + void write(const std::string& val, uint16_t pos = 0xffff); + void write(const char* buffer, int sz, uint16_t pos = 0xffff); + void write(const blob& val, uint16_t pos = 0xffff); + + bool next(void** data, int* size); + bool backup(int count); + + void get_buffers(__out_param std::vector& buffers) const; + int get_buffer_count() const { return static_cast(_buffers.size()); } + blob get_buffer() const; + blob get_first_buffer() const; + + int total_size() const { return _total_size; } + + private: + void create_buffer_and_writer(blob* pBuffer = nullptr); + + private: + std::vector _buffers; + std::vector _data; + bool _cur_is_placeholder; + int _cur_pos; + int _total_size; + int _reserved_size_per_buffer; + static int _reserved_size_per_buffer_static; + }; + + //--------------- inline implementation ------------------- + template + inline int binary_reader::read_pod(__out_param T& val) + { + if (sizeof(T) <= get_remaining_size()) + { + memcpy((void*)&val, _ptr, sizeof(T)); + _ptr += sizeof(T); + _remaining_size -= sizeof(T); + return static_cast(sizeof(T)); + } + else + { + dlog(::dsn::logging_level::log_level_WARNING, "dsn.utils", "read beyond the end of buffer"); + return 0; + } + } + + template + inline void binary_writer::write_pod(const T& val, uint16_t pos) + { + write((char*)&val, static_cast(sizeof(T)), pos); + } + + inline void binary_writer::get_buffers(__out_param std::vector& buffers) const + { + buffers = _data; + } + + inline blob binary_writer::get_first_buffer() const + { + return _data[0]; + } + + inline void binary_writer::write(const std::string& val, uint16_t pos /*= 0xffff*/) + { + int len = static_cast(val.length()); + write((const char*)&len, sizeof(int), pos); + if (len > 0) write((const char*)&val[0], len, pos); + } + + inline void binary_writer::write(const blob& val, uint16_t pos /*= 0xffff*/) + { + // TODO: optimization by not memcpy + int len = val.length(); + write((const char*)&len, sizeof(int), pos); + if (len > 0) write((const char*)val.data(), len, pos); + } +} + +namespace dsn { + namespace utils { + + extern void split_args(const char* args, __out_param std::vector& sargs, char splitter = ' '); + extern void split_args(const char* args, __out_param std::list& sargs, char splitter = ' '); + + extern char* trim_string(char* s); + + extern uint64_t get_random64(); + + extern uint64_t get_random64_pseudo(); + + extern uint64_t get_current_physical_time_ns(); + + extern void time_ms_to_string(uint64_t ts_ms, char* str); + } +} // end namespace dsn::utils + diff --git a/include/dsn/internal/zlock_provider.h b/include/dsn/internal/zlock_provider.h new file mode 100644 index 0000000000..e75a070502 --- /dev/null +++ b/include/dsn/internal/zlock_provider.h @@ -0,0 +1,108 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include + +namespace dsn { namespace service { +class zlock; +class zrwlock; +class zsemaphore; +}} + +namespace dsn { + +class lock_provider : public extensible_object +{ +public: + template static lock_provider* create(dsn::service::zlock *lock, lock_provider* inner_provider) + { + return new T(lock, inner_provider); + } + +public: + lock_provider(dsn::service::zlock *lock, lock_provider* inner_provider) { _inner_provider = inner_provider; } + virtual ~lock_provider() { if (nullptr != _inner_provider) delete _inner_provider; } + + virtual void lock() = 0; + virtual bool try_lock() = 0; + virtual void unlock() = 0; + + lock_provider* get_inner_provider() const { return _inner_provider; } + +private: + lock_provider *_inner_provider; +}; + +class rwlock_provider : public extensible_object +{ +public: + template static rwlock_provider* create(dsn::service::zrwlock *lock, rwlock_provider* inner_provider) + { + return new T(lock, inner_provider); + } + +public: + rwlock_provider(dsn::service::zrwlock *lock, rwlock_provider* inner_provider) { _inner_provider = inner_provider; } + virtual ~rwlock_provider() { if (nullptr != _inner_provider) delete _inner_provider; } + + virtual void lock_read() = 0; + virtual bool try_lock_read() = 0; + virtual void unlock_read() = 0; + + virtual void lock_write() = 0; + virtual bool try_lock_write() = 0; + virtual void unlock_write() = 0; + + rwlock_provider* get_inner_provider() const { return _inner_provider; } + +private: + rwlock_provider *_inner_provider; +}; + +class semaphore_provider : public extensible_object +{ +public: + template static semaphore_provider* create(dsn::service::zsemaphore *sema, int initCount, semaphore_provider* inner_provider) + { + return new T(sema, initCount, inner_provider); + } + +public: + semaphore_provider(dsn::service::zsemaphore *sema, int initialCount, semaphore_provider* inner_provider) { _inner_provider = inner_provider; } + virtual ~semaphore_provider() { if (nullptr != _inner_provider) delete _inner_provider; } + +public: + virtual void signal(int count) = 0; + virtual bool wait(int timeout_milliseconds = TIME_MS_MAX) = 0; + + semaphore_provider* get_inner_provider() const { return _inner_provider; } + +private: + semaphore_provider *_inner_provider; +}; + +} // end namespace diff --git a/include/dsn/internal/zlocks.h b/include/dsn/internal/zlocks.h new file mode 100644 index 0000000000..8a1e4276d5 --- /dev/null +++ b/include/dsn/internal/zlocks.h @@ -0,0 +1,155 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include +#include + +namespace dsn { namespace service { + +namespace lock_checker { + extern __thread int zlock_exclusive_count; + extern __thread int zlock_shared_count; + extern void check_wait_safety(); + extern void check_dangling_lock(); + extern void check_wait_task(task* waitee); +} + +class zlock +{ +public: + zlock(); + ~zlock(); + + void lock() { _provider->lock(); lock_checker::zlock_exclusive_count++; } + bool try_lock() { auto r = _provider->try_lock(); if (r) lock_checker::zlock_exclusive_count++; return r; } + void unlock() { lock_checker::zlock_exclusive_count--; _provider->unlock(); } + +private: + dsn::lock_provider *_provider; + +private: + // no assignment operator + zlock& operator=(const zlock& source); + zlock(const zlock& source); +}; + +class zrwlock +{ +public: + zrwlock(); + ~zrwlock(); + + void lock_read() { _provider->lock_read(); lock_checker::zlock_shared_count++; } + bool try_lock_read() { auto r = _provider->try_lock_read(); if (r) lock_checker::zlock_shared_count++; return r; } + void unlock_read() { lock_checker::zlock_shared_count--; _provider->unlock_read(); } + + void lock_write() { _provider->lock_write(); lock_checker::zlock_exclusive_count++; } + bool try_lock_write() { auto r = _provider->try_lock_write(); if (r) lock_checker::zlock_exclusive_count++; return r; } + void unlock_write() { lock_checker::zlock_exclusive_count--; _provider->unlock_write(); } + +private: + dsn::rwlock_provider *_provider; + +private: + // no assignment operator + zrwlock& operator=(const zrwlock& source); + zrwlock(const zrwlock& source); +}; + +class zsemaphore +{ +public: + zsemaphore(int initialCount = 0); + ~zsemaphore(); + +public: + virtual void signal(int count = 1) { _provider->signal(count); } + + virtual bool wait(int timeout_milliseconds = TIME_MS_MAX) { lock_checker::check_wait_safety(); return _provider->wait(timeout_milliseconds); } + +private: + dsn::semaphore_provider *_provider; + +private: + // no assignment operator + zsemaphore& operator=(const zsemaphore& source); + zsemaphore(const zsemaphore& source); +}; + +class zevent +{ +public: + zevent(bool manualReset, bool initState = false); + ~zevent(); + +public: + void set(); + void reset(); + bool wait(int timeout_milliseconds = TIME_MS_MAX); + +private: + zsemaphore _sema; + std::atomic _signaled; + bool _manualReset; + +private: + // no assignment operator + zevent& operator=(const zevent& source); + zevent(const zevent& source); +}; + +class zauto_lock +{ +public: + zauto_lock (zlock & lock) : _lock(&lock) { _lock->lock(); } + ~zauto_lock() { _lock->unlock(); } + +private: + zlock * _lock; +}; + +class zauto_read_lock +{ +public: + zauto_read_lock (zrwlock & lock) : _lock(&lock) { _lock->lock_read(); } + ~zauto_read_lock() { _lock->unlock_read(); } + +private: + zrwlock * _lock; +}; + +class zauto_write_lock +{ +public: + zauto_write_lock (zrwlock & lock) : _lock(&lock) { _lock->lock_write(); } + ~zauto_write_lock() { _lock->unlock_write(); } + +private: + zrwlock * _lock; +}; + +}} // end namespace dsn::service diff --git a/include/dsn/serverlet.h b/include/dsn/serverlet.h new file mode 100644 index 0000000000..e06c8b9ae9 --- /dev/null +++ b/include/dsn/serverlet.h @@ -0,0 +1,440 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include + +namespace dsn { + namespace service { + + // + // for TRequest/TResponse, we assume that the following routines are defined: + // marshall(binary_writer& writer, const T& val); + // unmarshall(binary_reader& reader, __out_param T& val); + // either in the namespace of ::dsn or T + // developers may write these helper functions by their own, or use tools + // such as protocol-buffer, thrift, or bond to generate these functions automatically + // for their TRequest and TResponse + // + + template + class rpc_replier + { + public: + rpc_replier(message_ptr& request) + { + _request = request; + _response = request->create_response(); + } + + rpc_replier(message_ptr& request, message_ptr& response) + { + _request = request; + _response = response; + } + + rpc_replier(const rpc_replier& r) + { + _request = r._request; + _response = r._response; + } + + void operator () (const TResponse& resp) + { + if (_response != nullptr) + { + marshall(_response->writer(), resp); + rpc::reply(_response); + } + } + + template + void continue_next( + const TResponse& local_response, + T2* next_service, + void (T2::*handler)(const T2Request&, rpc_replier&) + ); + + template + void continue_next_async( + const TResponse& local_response, + task_code code, + T2* next_service, + void (T2::*handler)(const T2Request&, rpc_replier&), + int hash = 0, + int delay_milliseconds = 0 + ); + + private: + message_ptr _request; + message_ptr _response; + }; + + template // where T : serverlet + class serverlet : public virtual servicelet + { + public: + serverlet(const char* nm); + ~serverlet(); + + protected: + template + bool register_rpc_handler(task_code rpc_code, const char* rpc_name_, void (T::*handler)(const TRequest&)); + + template + bool register_rpc_handler(task_code rpc_code, const char* rpc_name_, void (T::*handler)(const TRequest&, TResponse&)); + + template + bool register_async_rpc_handler(task_code rpc_code, const char* rpc_name_, void (T::*handler)(const TRequest&, rpc_replier&)); + + bool register_rpc_handler(task_code rpc_code, const char* rpc_name_, void (T::*handler)(message_ptr&)); + + bool unregister_rpc_handler(task_code rpc_code); + + template + void reply(message_ptr request, const TResponse& resp); + + public: + const std::string& name() const { return _name; } + + private: + std::string _name; + + private: + // type 1 -------------------------- + template + class service_rpc_request_task1 : public rpc_request_task, public service_context_manager + { + public: + service_rpc_request_task1(message_ptr& request, service_node* node, T* svc, void (T::*handler)(const TRequest&)) + : rpc_request_task(request, node), service_context_manager(svc, this) + { + _handler = handler; + _svc = svc; + } + + void exec() + { + TRequest req; + unmarshall(_request->reader(), req); + (_svc->*_handler)(req); + } + + private: + void (T::*_handler)(const TRequest&); + T* _svc; + }; + + template + class service_rpc_server_handler1 : public rpc_server_handler + { + public: + service_rpc_server_handler1(T* svc, void (T::*handler)(const TRequest&)) + { + _handler = handler; + _svc = svc; + } + + virtual rpc_request_task_ptr new_request_task(message_ptr& request, service_node* node) + { + return new service_rpc_request_task1(request, node, _svc, _handler); + } + + private: + void (T::*_handler)(const TRequest&); + T* _svc; + }; + + // type 2 --------------------------- + template + class service_rpc_request_task2 : public rpc_request_task, public service_context_manager + { + public: + service_rpc_request_task2(message_ptr& request, service_node* node, T* svc, void (T::*handler)(const TRequest&, TResponse&)) + : rpc_request_task(request, node), service_context_manager(svc, this) + { + _handler = handler; + _svc = svc; + } + + void exec() + { + TRequest req; + unmarshall(_request->reader(), req); + + TResponse resp; + (_svc->*_handler)(req, resp); + + rpc_replier replier(_request); + replier(resp); + } + + private: + void (T::*_handler)(const TRequest&, TResponse&); + T* _svc; + }; + + template + class service_rpc_server_handler2 : public rpc_server_handler + { + public: + service_rpc_server_handler2(T* svc, void (T::*handler)(const TRequest&, TResponse&)) + { + _handler = handler; + _svc = svc; + } + + virtual rpc_request_task_ptr new_request_task(message_ptr& request, service_node* node) + { + return new service_rpc_request_task2(request, node, _svc, _handler); + } + + private: + void (T::*_handler)(const TRequest&, TResponse&); + T* _svc; + }; + + // type 3 ----------------------------------- + template + class service_rpc_request_task3 : public rpc_request_task, public service_context_manager + { + public: + service_rpc_request_task3(message_ptr& request, service_node* node, T* svc, void (T::*handler)(const TRequest&, rpc_replier&)) + : rpc_request_task(request, node), service_context_manager(svc, this) + { + _handler = handler; + _svc = svc; + } + + void exec() + { + TRequest req; + unmarshall(_request->reader(), req); + + rpc_replier replier(_request); + (_svc->*_handler)(req, replier); + } + + private: + void (T::*_handler)(const TRequest&, rpc_replier&); + T* _svc; + }; + + template + class service_rpc_server_handler3 : public rpc_server_handler + { + public: + service_rpc_server_handler3(T* svc, void (T::*handler)(const TRequest&, rpc_replier&)) + { + _handler = handler; + _svc = svc; + } + + virtual rpc_request_task_ptr new_request_task(message_ptr& request, service_node* node) + { + return new service_rpc_request_task3(request, node, _svc, _handler); + } + + private: + void (T::*_handler)(const TRequest&, rpc_replier&); + T* _svc; + }; + + // type 4 ------------------------------------------ + class service_rpc_request_task4 : public rpc_request_task, public service_context_manager + { + public: + service_rpc_request_task4(message_ptr& request, service_node* node, T* svc, void (T::*handler)(message_ptr&)) + : rpc_request_task(request, node), service_context_manager(svc, this) + { + _handler = handler; + _svc = svc; + } + + void exec() + { + (_svc->*_handler)(_request); + } + + private: + void (T::*_handler)(message_ptr&); + T* _svc; + }; + + class service_rpc_server_handler4 : public rpc_server_handler + { + public: + service_rpc_server_handler4(T* svc, void (T::*handler)(message_ptr&)) + { + _handler = handler; + _svc = svc; + } + + virtual rpc_request_task_ptr new_request_task(message_ptr& request, service_node* node) + { + return new service_rpc_request_task4(request, node, _svc, _handler); + } + + private: + void (T::*_handler)(message_ptr&); + T* _svc; + }; + }; + + // ------------- inline implementation ---------------- + template template + inline void rpc_replier::continue_next( + const TResponse& local_response, + T2* next_service, + void (T2::*handler)(const T2Request&, rpc_replier&) + ) + { + marshall(_response->writer(), local_response); + + T2Request req; + unmarshall(_request->reader(), req); + + rpc_replier reply(_request, _response); + (next_service->*handler)(req, reply); + } + + template + class service_rpc_request_continue_task : public task, public service_context_manager + { + public: + service_rpc_request_continue_task( + message_ptr& request, + message_ptr& response, + task_code code, + T* svc, + void (T::*handler)(const TRequest&, rpc_replier&), + int hash = 0 + ) + : task(code, hash), service_context_manager(svc, this) + { + _handler = handler; + _svc = svc; + _request = request; + _response = response; + } + + void exec() + { + TRequest req; + unmarshall(_request->reader(), req); + + rpc_replier replier(_request, _response); + (_svc->*_handler)(req, replier); + } + + private: + void (T::*_handler)(const TRequest&, rpc_replier&); + T* _svc; + message_ptr _request; + message_ptr _response; + }; + + template template + inline void rpc_replier::continue_next_async( + const TResponse& local_response, + task_code code, + T2* next_service, + void (T2::*handler)(const T2Request&, rpc_replier&), + int hash, + int delay_milliseconds + ) + { + marshall(_response->writer(), local_response); + + task_ptr tsk(new service_rpc_request_continue_task( + _request, + _response, + code, + next_service, + handler, + hash + )); + + service::tasking::enqueue(tsk, delay_milliseconds); + } + + template + serverlet::serverlet(const char* nm) + : _name(nm) + { + } + + template + serverlet::~serverlet() + { + } + + template template + inline bool serverlet::register_rpc_handler(task_code rpc_code, const char* rpc_name_, void (T::*handler)(const TRequest&)) + { + return rpc::register_rpc_handler(rpc_code, rpc_name_, + new service_rpc_server_handler1(static_cast(this), handler)); + } + + template template + inline bool serverlet::register_rpc_handler(task_code rpc_code, const char* rpc_name_, void (T::*handler)(const TRequest&, TResponse&)) + { + return rpc::register_rpc_handler(rpc_code, rpc_name_, + new service_rpc_server_handler2(static_cast(this), handler)); + } + + template template + inline bool serverlet::register_async_rpc_handler(task_code rpc_code, const char* rpc_name_, void (T::*handler)(const TRequest&, rpc_replier&)) + { + return rpc::register_rpc_handler(rpc_code, rpc_name_, + new service_rpc_server_handler3(static_cast(this), handler)); + } + + template + inline bool serverlet::register_rpc_handler(task_code rpc_code, const char* rpc_name_, void (T::*handler)(message_ptr&)) + { + return rpc::register_rpc_handler(rpc_code, rpc_name_, + new service_rpc_server_handler4(static_cast(this), handler)); + } + + template + inline bool serverlet::unregister_rpc_handler(task_code rpc_code) + { + return rpc::unregister_rpc_handler(rpc_code); + } + + templatetemplate + inline void serverlet::reply(message_ptr request, const TResponse& resp) + { + auto msg = request->create_response(); + marshall(msg->writer(), resp); + rpc::reply(msg); + } + } // end namespace service +} // end namespace + + + diff --git a/include/dsn/service_api.h b/include/dsn/service_api.h new file mode 100644 index 0000000000..cfc0055c3d --- /dev/null +++ b/include/dsn/service_api.h @@ -0,0 +1,127 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include + +namespace dsn { namespace service { + +namespace tasking +{ + inline void enqueue(task_ptr& task, int delay_milliseconds = 0) + { + if (delay_milliseconds > 0) + { + task->set_delay(delay_milliseconds); + } + task->enqueue(); + } + + inline bool cancel(task_ptr& task, bool wait_until_finished) + { + return task->cancel(wait_until_finished); + } + + inline bool wait(task_ptr& task, int timeout_milliseconds = TIME_MS_MAX) + { + return task->wait(timeout_milliseconds); + } +} + +namespace rpc +{ + extern const end_point& primary_address(); + + extern bool register_rpc_handler(task_code code, const char* name, rpc_server_handler* handler); + + extern bool unregister_rpc_handler(task_code code); + + extern void reply(message_ptr& response); + + // when callback is empty, we assume callers will invoke return::wait() to perform a synchronous rpc call + // to invoke a one way rpc call, use call_one_way below + extern rpc_response_task_ptr call(const end_point& server, message_ptr& request, rpc_response_task_ptr callback = nullptr); + + extern void call_one_way(const end_point& server, message_ptr& request); +} + +namespace file +{ + extern handle_t open(const char* file_name, int flag, int pmode); + + extern void read(handle_t hFile, char* buffer, int count, uint64_t offset, aio_task_ptr& callback); + + extern void write(handle_t hFile, const char* buffer, int count, uint64_t offset, aio_task_ptr& callback); + + extern error_code close(handle_t hFile); + + extern void copy_remote_files( + const end_point& remote, + std::string& source_dir, + std::vector& files, // empty for all + std::string& dest_dir, + bool overwrite, + aio_task_ptr& callback + ); +} + +namespace env +{ + // since Epoch (1970-01-01 00:00:00 +0000 (UTC)) + extern uint64_t now_ns(); + + // generate random number [min, max] + extern uint64_t random64(uint64_t min, uint64_t max); + + inline uint64_t now_us() { return now_ns() / 1000; } + inline uint64_t now_ms() { return now_ns() / 1000000; } + inline uint32_t random32(uint32_t min, uint32_t max) { return static_cast(random64(min, max)); } + inline double probability() { return static_cast(random32(0, 1000000000)) / 1000000000.0; } +} + +namespace system +{ + extern bool run(const char* config, bool sleep_after_init); + extern bool is_ready(); + + namespace internal_use_only + { + extern bool register_service(const char* name, service_app_factory factory); + } + + template bool register_service(const char* name) + { + return internal_use_only::register_service(name, service_app::create); + } + + extern configuration_ptr config(); + extern service_app* get_current_app(); + extern const std::map& get_all_apps(); +} + +}} // end namespace dsn::service diff --git a/include/dsn/thrift_helper.h b/include/dsn/thrift_helper.h new file mode 100644 index 0000000000..8509462c1c --- /dev/null +++ b/include/dsn/thrift_helper.h @@ -0,0 +1,411 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include + +# include +# include +# include +# include +# include + +using namespace ::apache::thrift::transport; + +namespace dsn { + + class binary_reader_transport : public TVirtualTransport + { + public: + binary_reader_transport(binary_reader& reader) + : _reader(reader) + { + } + + bool isOpen() { return true; } + + void open() {} + + void close() {} + + uint32_t read(uint8_t* buf, uint32_t len) + { + int l = _reader.read((char*)buf, static_cast(len)); + if (l == 0) + { + throw TTransportException(TTransportException::END_OF_FILE, + "no more data to read after end-of-buffer"); + } + return (uint32_t)l; + } + + private: + binary_reader& _reader; + }; + + class binary_writer_transport : public TVirtualTransport + { + public: + binary_writer_transport(binary_writer& writer) + : _writer(writer) + { + } + + bool isOpen() { return true; } + + void open() {} + + void close() {} + + void write(const uint8_t* buf, uint32_t len) + { + _writer.write((const char*)buf, static_cast(len)); + } + + private: + binary_writer& _writer; + }; + + #define DEFINE_THRIFT_BASE_TYPE_SERIALIZATION(TName, TTag, TMethod) \ + inline int write_base(::apache::thrift::protocol::TProtocol* proto, const TName& val)\ + {\ + int xfer = proto->writeFieldBegin("val", ::apache::thrift::protocol::TType::T_##TTag, 0); \ + xfer += proto->write##TMethod(val); \ + xfer += proto->writeFieldEnd(); \ + return xfer;\ + }\ + inline int read_base(::apache::thrift::protocol::TProtocol* proto, __out_param TName& val, ::apache::thrift::protocol::TType ftype)\ + {\ + if (ftype == ::apache::thrift::protocol::TType::T_##TTag) return proto->read##TMethod(val); \ + else return proto->skip(ftype);\ + } + + DEFINE_THRIFT_BASE_TYPE_SERIALIZATION(bool, BOOL, Bool) + DEFINE_THRIFT_BASE_TYPE_SERIALIZATION(int8_t, I08, Byte) + DEFINE_THRIFT_BASE_TYPE_SERIALIZATION(int16_t, I16, I16) + DEFINE_THRIFT_BASE_TYPE_SERIALIZATION(int32_t, I32, I32) + DEFINE_THRIFT_BASE_TYPE_SERIALIZATION(int64_t, I64, I64) + DEFINE_THRIFT_BASE_TYPE_SERIALIZATION(double, DOUBLE, Double) + DEFINE_THRIFT_BASE_TYPE_SERIALIZATION(std::string, STRING, String) + + template + inline uint32_t marshall_base(::apache::thrift::protocol::TProtocol* oproto, const TName& val) + { + uint32_t xfer = 0; + xfer += oproto->writeStructBegin("val"); + xfer += write_base(oproto, val); + xfer += oproto->writeFieldStop(); + xfer += oproto->writeStructEnd(); + return xfer; + } + + template + inline uint32_t unmarshall_base(::apache::thrift::protocol::TProtocol* iproto, __out_param TName& val) + { + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iproto->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + while (true) + { + xfer += iproto->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + + switch (fid) + { + case 0: + xfer += read_base(iproto, val, ftype); + break; + default: + xfer += iproto->skip(ftype); + break; + } + + xfer += iproto->readFieldEnd(); + } + + xfer += iproto->readStructEnd(); + return xfer; + } + + template + void marshall(binary_writer& writer, const T& val) + { + boost::shared_ptr<::dsn::binary_writer_transport> transport(new ::dsn::binary_writer_transport(writer)); + ::apache::thrift::protocol::TBinaryProtocol proto(transport); + marshall_base(&proto, val); + } + + template + void unmarshall(binary_reader& reader, __out_param T& val) + { + boost::shared_ptr<::dsn::binary_reader_transport> transport(new ::dsn::binary_reader_transport(reader)); + ::apache::thrift::protocol::TBinaryProtocol proto(transport); + unmarshall_base(&proto, val); + } + + template + uint32_t marshall_rpc_args( + ::apache::thrift::protocol::TProtocol* oprot, + const T& val, + uint32_t(T::*writer)(::apache::thrift::protocol::TProtocol*) const + ) + { + uint32_t xfer = 0; + oprot->incrementRecursionDepth(); + xfer += oprot->writeStructBegin("rpc_message"); + + xfer += oprot->writeFieldBegin("msg", ::apache::thrift::protocol::T_STRUCT, 1); + + xfer += (val.*writer)(oprot); + + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + oprot->decrementRecursionDepth(); + return xfer; + } + + template + uint32_t unmarshall_rpc_args( + ::apache::thrift::protocol::TProtocol* iprot, + __out_param T& val, + uint32_t(T::*reader)(::apache::thrift::protocol::TProtocol*) + ) + { + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += (val.*reader)(iprot); + } + else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + iprot->readMessageEnd(); + iprot->getTransport()->readEnd(); + return xfer; + } + + DEFINE_CUSTOMIZED_ID(network_header_format, NET_HDR_THRIFT); + + class thrift_binary_message_parser : public message_parser + { + public: + static void register_parser() + { + ::dsn::tools::register_component_provider("thrift"); + } + + private: + // only one concurrent write message for each parser, so + char _write_buffer_for_header[512]; + + public: + thrift_binary_message_parser(int buffer_block_size) + : message_parser(buffer_block_size) + { + } + + virtual void get_output_buffers(message_ptr& msg, __out_param std::vector& buffers) + { + // prepare head + blob bb(_write_buffer_for_header, 0, 512); + binary_writer writer(bb); + boost::shared_ptr<::dsn::binary_writer_transport> transport(new ::dsn::binary_writer_transport(writer)); + ::apache::thrift::protocol::TBinaryProtocol proto(transport); + + auto sp = task_spec::get(msg->header().local_rpc_code); + + proto.writeMessageBegin(msg->header().rpc_name, + sp->type == TASK_TYPE_RPC_REQUEST ? + ::apache::thrift::protocol::T_CALL : + ::apache::thrift::protocol::T_REPLY, + (int32_t)msg->header().id + ); + + // patched end (writeMessageEnd) + // no need for now as no data is written + + // finalize + std::vector lbuffers; + msg->writer().get_buffers(lbuffers); + if (lbuffers[0].length() == message_header::serialized_size()) + { + lbuffers[0] = writer.get_buffer(); + buffers = lbuffers; + } + else + { + dassert(lbuffers[0].length() > message_header::serialized_size(), ""); + buffers.resize(lbuffers.size() + 1); + buffers[0] = writer.get_buffer(); + + for (int i = 0; i < static_cast(lbuffers.size()); i++) + { + if (i == 0) + { + buffers[1] = lbuffers[0].range(message_header::serialized_size()); + } + else + { + buffers[i + 1] = lbuffers[i]; + } + } + } + } + + virtual message_ptr on_read(int read_length, __out_param int& read_next) + { + mark_read(read_length); + + if (_read_buffer_occupied < 10) + { + read_next = 128; + return nullptr; + } + + try + { + blob bb = _read_buffer.range(0, _read_buffer_occupied); + binary_reader reader(bb); + boost::shared_ptr<::dsn::binary_reader_transport> transport(new ::dsn::binary_reader_transport(reader)); + ::apache::thrift::protocol::TBinaryProtocol proto(transport); + + int32_t rseqid = 0; + std::string fname; + ::apache::thrift::protocol::TMessageType mtype; + + proto.readMessageBegin(fname, mtype, rseqid); + int hdr_sz = _read_buffer_occupied - reader.get_remaining_size(); + + if (mtype == ::apache::thrift::protocol::T_EXCEPTION) + { + proto.skip(::apache::thrift::protocol::T_STRUCT); + } + else + { + proto.skip(::apache::thrift::protocol::T_STRUCT); + } + + proto.readMessageEnd(); + proto.getTransport()->readEnd(); + + // msg done + int msg_sz = _read_buffer_occupied - reader.get_remaining_size() - hdr_sz; + auto msg_bb = _read_buffer.range(hdr_sz, msg_sz); + message_ptr msg = new message(msg_bb, false); + msg->header().id = msg->header().rpc_id = rseqid; + strcpy(msg->header().rpc_name, fname.c_str()); + msg->header().body_length = msg_sz; + + _read_buffer = _read_buffer.range(msg_sz + hdr_sz); + _read_buffer_occupied -= (msg_sz + hdr_sz); + read_next = 128; + return msg; + } + catch (TTransportException& ex) + { + ex; + return nullptr; + } + } + }; + +} + +/* + symbols defined in libthrift, putting here so we don't need to link :-) +*/ +namespace apache { + namespace thrift { + namespace transport { + inline const char* TTransportException::what() const throw() { + if (message_.empty()) { + switch (type_) { + case UNKNOWN: + return "TTransportException: Unknown transport exception"; + case NOT_OPEN: + return "TTransportException: Transport not open"; + case TIMED_OUT: + return "TTransportException: Timed out"; + case END_OF_FILE: + return "TTransportException: End of file"; + case INTERRUPTED: + return "TTransportException: Interrupted"; + case BAD_ARGS: + return "TTransportException: Invalid arguments"; + case CORRUPTED_DATA: + return "TTransportException: Corrupted Data"; + case INTERNAL_ERROR: + return "TTransportException: Internal error"; + default: + return "TTransportException: (Invalid exception type)"; + } + } + else { + return message_.c_str(); + } + } + } + } +} // apache::thrift::transport diff --git a/include/dsn/tool/nativerun.h b/include/dsn/tool/nativerun.h new file mode 100644 index 0000000000..62829713f5 --- /dev/null +++ b/include/dsn/tool/nativerun.h @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include + +namespace dsn { + namespace tools { + + class nativerun : public tool_app + { + public: + nativerun(const char* name) + : tool_app(name) + { + } + + void install(service_spec& s); + + virtual void run() override; + }; + + } +} // end namespace dsn::tools diff --git a/include/dsn/tool/providers.common.h b/include/dsn/tool/providers.common.h new file mode 100644 index 0000000000..d23e55c622 --- /dev/null +++ b/include/dsn/tool/providers.common.h @@ -0,0 +1,34 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include + +namespace dsn { + namespace tools { + extern void register_common_providers(); + } +} diff --git a/include/dsn/tool/simulator.h b/include/dsn/tool/simulator.h new file mode 100644 index 0000000000..aaace951bc --- /dev/null +++ b/include/dsn/tool/simulator.h @@ -0,0 +1,45 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include + +namespace dsn { namespace tools { + +class simulator : public tool_app +{ +public: + simulator(const char* name) + : tool_app(name) + { + } + + void install(service_spec& s); + + virtual void run() override; +}; + +}} // end namespace dsn::tools diff --git a/include/dsn/tool_api.h b/include/dsn/tool_api.h new file mode 100644 index 0000000000..6153ede18f --- /dev/null +++ b/include/dsn/tool_api.h @@ -0,0 +1,149 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +// providers +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +namespace dsn { namespace tools { + +class tool_base +{ +public: + tool_base(const char* name); + +protected: + std::string _name; +}; + +class toollet : public tool_base +{ +public: + template static toollet* create(const char* name) + { + return new T(name); + } + +public: + toollet(const char* name); + + virtual void install(service_spec& spec) = 0; +}; + +class tool_app : public tool_base +{ +public: + template static tool_app* create(const char* name) + { + return new T(name); + } + +public: + tool_app(const char* name); + + virtual void install(service_spec& spec) = 0; + + // this routine will be invoked in the main thread as the tool driver (if necessary for the tool, e.g., model checking) + virtual void run() + { + start_all_service_apps(); + } + +public: + virtual void start_all_service_apps(); + virtual void stop_all_service_apps(); + + static const service_spec& get_service_spec(); +}; + +typedef task_queue* (*task_queue_factory)(task_worker_pool*, int, task_queue*); +typedef task_worker* (*task_worker_factory)(task_worker_pool*, task_queue*, int, task_worker*); +typedef admission_controller* (*admission_controller_factory)(task_queue*, const char*); +typedef lock_provider* (*lock_factory)(dsn::service::zlock *, lock_provider*); +typedef rwlock_provider* (*read_write_lock_factory)(dsn::service::zrwlock *, rwlock_provider*); +typedef semaphore_provider* (*semaphore_factory)(dsn::service::zsemaphore *, int, semaphore_provider*); +typedef network* (*network_factory)(rpc_engine*, network*); +typedef aio_provider* (*aio_factory)(disk_engine*, aio_provider*); +typedef env_provider* (*env_factory)(env_provider*); +typedef nfs_node* (*nfs_factory)(service_node*); +typedef message_parser* (*message_parser_factory)(int); + +typedef perf_counter* (*perf_counter_factory)(const char *, const char *, perf_counter_type); +typedef logging_provider* (*logging_factory)(const char*); +typedef toollet* (*toollet_factory)(const char*); +typedef tool_app* (*tool_app_factory)(const char*); + +namespace internal_use_only +{ + bool register_component_provider(const char* name, task_queue_factory f, int type); + bool register_component_provider(const char* name, task_worker_factory f, int type); + bool register_component_provider(const char* name, admission_controller_factory f, int type); + bool register_component_provider(const char* name, lock_factory f, int type); + bool register_component_provider(const char* name, read_write_lock_factory f, int type); + bool register_component_provider(const char* name, semaphore_factory f, int type); + bool register_component_provider(const char* name, network_factory f, int type); + bool register_component_provider(const char* name, aio_factory f, int type); + bool register_component_provider(const char* name, env_factory f, int type); + bool register_component_provider(const char* name, perf_counter_factory f, int type); + bool register_component_provider(const char* name, logging_factory f, int type); + bool register_component_provider(const char* name, nfs_factory f, int type); + bool register_component_provider(const char* name, message_parser_factory f, int type); + + bool register_toollet(const char* name, toollet_factory f, int type); + bool register_tool(const char* name, tool_app_factory f, int type); + toollet* get_toollet(const char* name, int type); +} + +extern join_point syste_init; +extern join_point syste_exit; // return (see SetUnhandledExceptionFilter), type, error code, context + +template bool register_component_provider(const char* name) { return internal_use_only::register_component_provider(name, T::template create, PROVIDER_TYPE_MAIN); } +template bool register_component_aspect(const char* name) { return internal_use_only::register_component_provider(name, T::template create, PROVIDER_TYPE_ASPECT); } +template bool register_message_header_parser(network_header_format fmt) { return internal_use_only::register_component_provider(fmt.to_string(), T::template create, PROVIDER_TYPE_MAIN); } + +template bool register_toollet(const char* name) { return internal_use_only::register_toollet(name, toollet::template create, 0); } +template bool register_tool(const char* name) { return internal_use_only::register_tool(name, tool_app::template create, 0); } +template T* get_toollet(const char* name) { return (T*)internal_use_only::get_toollet(name, 0); } +configuration_ptr config(); + +// --------- inline implementation ----------------------------- + + +}} // end namespace dsn::tool_api + diff --git a/include/dsn/toollet/fault_injector.h b/include/dsn/toollet/fault_injector.h new file mode 100644 index 0000000000..4ce1248d1a --- /dev/null +++ b/include/dsn/toollet/fault_injector.h @@ -0,0 +1,42 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include + +namespace dsn { + namespace tools { + + class fault_injector : public toollet + { + public: + fault_injector(const char* name); + virtual void install(service_spec& spec); + }; + } +} + + diff --git a/include/dsn/toollet/profiler.h b/include/dsn/toollet/profiler.h new file mode 100644 index 0000000000..30b46e9a4b --- /dev/null +++ b/include/dsn/toollet/profiler.h @@ -0,0 +1,42 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include + +namespace dsn { + namespace tools { + + class profiler : public toollet + { + public: + profiler(const char* name); + virtual void install(service_spec& spec); + }; + } +} + + diff --git a/include/dsn/toollet/tracer.h b/include/dsn/toollet/tracer.h new file mode 100644 index 0000000000..243048dac8 --- /dev/null +++ b/include/dsn/toollet/tracer.h @@ -0,0 +1,42 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include + +namespace dsn { + namespace tools { + + class tracer : public toollet + { + public: + tracer(const char* name); + virtual void install(service_spec& spec); + }; + } +} + + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000000..7c0528a88b --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,6 @@ +add_subdirectory(core) +add_subdirectory(dev) +add_subdirectory(tools) +add_subdirectory(cli) +add_subdirectory(dist) +add_subdirectory(apps) diff --git a/src/apps/CMakeLists.txt b/src/apps/CMakeLists.txt new file mode 100644 index 0000000000..d6659e52d3 --- /dev/null +++ b/src/apps/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(echo) +add_subdirectory(replication) diff --git a/src/apps/echo/CMakeLists.txt b/src/apps/echo/CMakeLists.txt new file mode 100644 index 0000000000..96a5ac435a --- /dev/null +++ b/src/apps/echo/CMakeLists.txt @@ -0,0 +1,3 @@ +#set(INPUT_LIBS dsn.failure_detector ${DSN_LIBS}) +set(BINPLACE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/config.ini") +dsn_add_executable(echo "${BINPLACE_FILES}") diff --git a/src/apps/echo/config.ini b/src/apps/echo/config.ini new file mode 100644 index 0000000000..9681c9de82 --- /dev/null +++ b/src/apps/echo/config.ini @@ -0,0 +1,78 @@ +[apps.server] +name = server +type = echo_server_app +arguments = + +ports = 8101,8103 +run = true +count = 1 + +[apps.client] +name = client +type = echo_client_app +arguments = localhost 8101 +count = 1 +message_size = 1024000 +concurrency = 1 +run = true +echo2 = true + +[core] + +;tool = simulator +tool = nativerun +;toollets = tracer, profiler +;fault_injector +pause_on_start = false + +[tools.simulator] +random_seed = 2756568580 +use_given_random_seed = true + +[network] +; how many network threads for network library (used by asio) +io_service_worker_count = 2 + +[network.8101] +; channel = network_header_format, network_provider_name, buffer_block_size +;RPC_CHANNEL_TCP = NET_HDR_DSN, dsn::tools::asio_network_provider, 65536 + +;RPC_CHANNEL_TCP = NET_HDR_THRIFT, dsn::tools::asio_network_provider, 65536 + + +[task.default] +is_trace = true +is_profile = true +allow_inline = false +rpc_call_channel = RPC_CHANNEL_TCP +fast_execution_in_network_thread = false +rpc_message_header_format = dsn +rpc_timeout_milliseconds = 5000 + +[task.LPC_AIO_IMMEDIATE_CALLBACK] +is_trace = false +is_profile = false +allow_inline = false + +[task.LPC_RPC_TIMEOUT] +is_trace = false +is_profile = false + +; specification for each thread pool +[threadpool.default] + +[threadpool.THREAD_POOL_DEFAULT] +name = default +partitioned = false +worker_count = 1 +; max_input_queue_length = 1024 +worker_priority = THREAD_xPRIORITY_NORMAL + +; BoundedQueueAdmissionController MaxTaskQueueSize +; SingleRpcClassResponseTimeAdmissionController RpcRequestEventCode PercentileType(0-4) LatencyThreshold100ns(from task create to end in local process) +; counter percentile type (0-4): 999, 99, 95, 90, 50 +;admission_controller_factory_name = SingleRpcClassResponseTimeAdmissionController +;admission_controller_arguments = RPC_TEST 1 20000 + +;admission_controller_factory_name = BoundedQueueAdmissionController +;admission_controller_arguments = 100 diff --git a/src/apps/echo/echo_service.h b/src/apps/echo/echo_service.h new file mode 100644 index 0000000000..dd2080ece7 --- /dev/null +++ b/src/apps/echo/echo_service.h @@ -0,0 +1,234 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include + +DEFINE_THREAD_POOL_CODE(THREAD_POOL_TEST) +DEFINE_TASK_CODE(LPC_ECHO_TIMER, ::dsn::TASK_PRIORITY_HIGH, THREAD_POOL_TEST) +DEFINE_TASK_CODE_RPC(RPC_ECHO, ::dsn::TASK_PRIORITY_HIGH, THREAD_POOL_TEST) +DEFINE_TASK_CODE_RPC(RPC_ECHO2, ::dsn::TASK_PRIORITY_HIGH, THREAD_POOL_TEST) + +using namespace dsn; +using namespace dsn::service; + +class echo_server : public serverlet, public service_app +{ +public: + echo_server(service_app_spec* s) + : service_app(s), serverlet("echo_server") + { + _empty_reply = system::config()->get_value("apps.server", "empty_reply", false); + } + + void on_echo(const std::string& req, __out_param std::string& resp) + { + if (!_empty_reply) + resp = req; + else + resp = ""; + } + + void on_echo2(const blob& req, rpc_replier& reply) + { + if (!_empty_reply) + reply(req); + else + { + blob empty; + reply(empty); + } + } + + virtual error_code start(int argc, char** argv) + { + register_rpc_handler(RPC_ECHO, "RPC_ECHO", &echo_server::on_echo); + register_async_rpc_handler(RPC_ECHO2, "RPC_ECHO2", &echo_server::on_echo2); + return ERR_SUCCESS; + } + + virtual void stop(bool cleanup = false) + { + unregister_rpc_handler(RPC_ECHO); + unregister_rpc_handler(RPC_ECHO2); + } + +private: + bool _empty_reply; +}; + +class echo_client : public serverlet, public service_app +{ +public: + echo_client(service_app_spec* s) + : service_app(s), serverlet("echo_client") + { + _message_size = system::config()->get_value("apps.client", "message_size", 1024); + _concurrency = system::config()->get_value("apps.client", "concurrency", 1); + _echo2 = system::config()->get_value("apps.client", "echo2", false); + + _seq = 0; + _last_report_ts_ms = now_ms(); + _recv_bytes_since_last = 0; + _live_echo_count = 0; + } + + virtual error_code start(int argc, char** argv) + { + if (argc < 3) + return ERR_INVALID_PARAMETERS; + + _server = end_point(argv[1], (uint16_t)atoi(argv[2])); + _timer = tasking::enqueue(LPC_ECHO_TIMER, this, &echo_client::on_echo_timer, 0, 1000); + return ERR_SUCCESS; + } + + virtual void stop(bool cleanup = false) + { + _timer->cancel(true); + } + + void send_one() + { + char buf[120]; + sprintf(buf, "%u", ++_seq); + + if (!_echo2) + { + std::shared_ptr req(new std::string("hi, dsn ")); + *req = req->append(buf); + req->resize(_message_size); + rpc::call_typed(_server, RPC_ECHO, req, this, &echo_client::on_echo_reply, 0, 5000); + } + else + { + std::shared_ptr buffer((char*)::malloc(_message_size)); + std::shared_ptr bb(new blob(buffer, _message_size)); + rpc::call_typed(_server, RPC_ECHO2, bb, this, &echo_client::on_echo_reply2, 0, 5000); + } + } + + void on_echo_timer() + { + for (int i = 0; i < _concurrency; i++) + { + { + zauto_lock l(_lock); + ++_live_echo_count; + } + send_one(); + } + } + + void on_echo_reply(error_code err, std::shared_ptr& req, std::shared_ptr& resp) + { + if (err != ERR_SUCCESS) + { + bool s = false; + std::cout << "echo err: " << err.to_string() << std::endl; + { + zauto_lock l(_lock); + if (1 == --_live_echo_count) + { + ++_live_echo_count; + s = true; + } + } + + if (s) send_one(); + } + else + { + { + zauto_lock l(_lock); + _recv_bytes_since_last += _message_size; + auto n = now_ms(); + if (n - _last_report_ts_ms >= 1000) + { + std::cout << "throughput = " + << static_cast(_recv_bytes_since_last) / 1024.0 / 1024.0 / ((static_cast(n - _last_report_ts_ms)) / 1000.0) + << " MB/s" << std::endl; + _last_report_ts_ms = n; + _recv_bytes_since_last = 0; + } + } + + send_one(); + } + } + + void on_echo_reply2(error_code err, std::shared_ptr& req, std::shared_ptr& resp) + { + if (err != ERR_SUCCESS) + { + bool s = false; + std::cout << "echo err: " << err.to_string() << std::endl; + { + zauto_lock l(_lock); + if (1 == --_live_echo_count) + { + ++_live_echo_count; + s = true; + } + } + + if (s) send_one(); + } + else + { + { + zauto_lock l(_lock); + _recv_bytes_since_last += _message_size; + auto n = now_ms(); + if (n - _last_report_ts_ms >= 1000) + { + std::cout << "throughput = " + << static_cast(_recv_bytes_since_last) / 1024.0 / 1024.0 / ((static_cast(n - _last_report_ts_ms)) / 1000.0) + << " MB/s" << std::endl; + _last_report_ts_ms = n; + _recv_bytes_since_last = 0; + } + } + + send_one(); + } + } + +private: + zlock _lock; + uint64_t _recv_bytes_since_last; + uint64_t _last_report_ts_ms; + int32_t _live_echo_count; + + end_point _server; + int _seq; + int _message_size; + int _concurrency; + bool _echo2; + task_ptr _timer; +}; diff --git a/src/apps/echo/main.cpp b/src/apps/echo/main.cpp new file mode 100644 index 0000000000..e599749ced --- /dev/null +++ b/src/apps/echo/main.cpp @@ -0,0 +1,52 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# include "echo_service.h" +# include +# include +# include +# include +# include + +using namespace dsn::service; + +int main(int argc, char * argv[]) +{ + // register all possible services + dsn::service::system::register_service("echo_client_app"); + dsn::service::system::register_service("echo_server_app"); + + // register all possible tools and toollets + dsn::tools::register_tool("nativerun"); + dsn::tools::register_tool("simulator"); + dsn::tools::register_toollet("tracer"); + dsn::tools::register_toollet("profiler"); + dsn::tools::register_toollet("fault_injector"); + + // specify what services and tools will run in config file, then run + dsn::service::system::run("config.ini", true); + return 0; +} diff --git a/src/apps/replication/CMakeLists.txt b/src/apps/replication/CMakeLists.txt new file mode 100644 index 0000000000..6b22e65893 --- /dev/null +++ b/src/apps/replication/CMakeLists.txt @@ -0,0 +1,4 @@ +add_subdirectory(client_lib) +add_subdirectory(meta_server) +add_subdirectory(lib) +add_subdirectory(exe) diff --git a/src/apps/replication/client_lib/CMakeLists.txt b/src/apps/replication/client_lib/CMakeLists.txt new file mode 100644 index 0000000000..9dd518882a --- /dev/null +++ b/src/apps/replication/client_lib/CMakeLists.txt @@ -0,0 +1 @@ +dsn_add_library(dsn.replication.clientlib) diff --git a/src/apps/replication/client_lib/replication_app_client_base.cpp b/src/apps/replication/client_lib/replication_app_client_base.cpp new file mode 100644 index 0000000000..2e473461fe --- /dev/null +++ b/src/apps/replication/client_lib/replication_app_client_base.cpp @@ -0,0 +1,465 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replication_common.h" +#include "rpc_replicated.h" + +namespace dsn { namespace replication { + +using namespace ::dsn::service; + +void replication_app_client_base::load_meta_servers( + configuration_ptr& cf, + __out_param std::vector& servers + ) +{ + // read meta_servers from machine list file + servers.clear(); + + std::vector server_ss; + cf->get_all_keys("replication.meta_servers", server_ss); + for (auto& s : server_ss) + { + // name:port + auto pos1 = s.find_first_of(':'); + if (pos1 != std::string::npos) + { + end_point ep(s.substr(0, pos1).c_str(), atoi(s.substr(pos1 + 1).c_str())); + servers.push_back(ep); + } + } +} + +replication_app_client_base::replication_app_client_base( + const std::vector& meta_servers, + const char* app_name + ) +{ + _app_name = std::string(app_name); + _meta_servers = meta_servers; + + _app_id = -1; + _last_contact_point = end_point::INVALID; +} + +replication_app_client_base::~replication_app_client_base() +{ + clear_all_pending_tasks(); +} + +void replication_app_client_base::clear_all_pending_tasks() +{ + message_ptr nil(nullptr); + + service::zauto_lock l(_requests_lock); + for (auto& pc : _pending_requests) + { + if (pc.second->query_config_task != nullptr) + pc.second->query_config_task->cancel(true); + + for (auto& rc : pc.second->requests) + { + end_request(rc, ERR_TIMEOUT, nil); + delete rc; + } + delete pc.second; + } + _pending_requests.clear(); +} + + +void replication_app_client_base::on_user_request_timeout(request_context* rc) +{ + message_ptr nil(nullptr); + rc->callback_task->enqueue(ERR_TIMEOUT, nil); +} + +DEFINE_TASK_CODE(LPC_REPLICATION_CLIENT_REQUEST_TIMEOUT, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) +DEFINE_TASK_CODE(LPC_REPLICATION_DELAY_QUERY_CONFIG, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) + +replication_app_client_base::request_context* replication_app_client_base::create_write_context( + int partition_index, + task_code code, + rpc_response_task_ptr callback, + int reply_hash + ) +{ + auto rc = new request_context; + rc->callback_task = callback; + rc->is_read = false; + rc->partition_index = partition_index; + rc->write_header.gpid.app_id = _app_id; + rc->write_header.gpid.pidx = partition_index; + rc->write_header.code = code; + rc->timeout_timer = nullptr; + + if (rc->read_header.gpid.app_id == -1) + { + rc->header_pos = callback->get_request()->writer().write_placeholder(); + } + else + { + rc->header_pos = 0xffff; + marshall(callback->get_request()->writer(), rc->write_header); + } + + return rc; +} + +replication_app_client_base::request_context* replication_app_client_base::create_read_context( + int partition_index, + task_code code, + rpc_response_task_ptr callback, + read_semantic_t read_semantic, + decree snapshot_decree, // only used when ReadSnapshot + int reply_hash + ) +{ + auto rc = new request_context; + rc->callback_task = callback; + rc->is_read = true; + rc->partition_index = partition_index; + rc->read_header.gpid.app_id = _app_id; + rc->read_header.gpid.pidx = partition_index; + rc->read_header.code = code; + rc->read_header.semantic = read_semantic; + rc->read_header.version_decree = snapshot_decree; + rc->timeout_timer = nullptr; + + if (rc->read_header.gpid.app_id == -1) + { + rc->header_pos = callback->get_request()->writer().write_placeholder(); + } + else + { + rc->header_pos = 0xffff; + marshall(callback->get_request()->writer(), rc->read_header); + } + + return rc; +} + +void replication_app_client_base::end_request(request_context* request, error_code err, message_ptr& resp) +{ + if (request->timeout_timer == nullptr || request->timeout_timer->cancel(true)) + { + request->callback_task->enqueue(err, resp); + } +} + +void replication_app_client_base::call(request_context* request, bool no_delay) +{ + auto& msg = request->callback_task->get_request(); + auto nts = ::dsn::service::env::now_us(); + if (nts + 100 >= msg->header().client.timeout_ts_us) // < 100us + { + message_ptr nil(nullptr); + end_request(request, ERR_TIMEOUT, nil); + delete request; + return; + } + + end_point addr; + int app_id; + + error_code err = get_address( + request->partition_index, + !request->is_read, + addr, + app_id, + request->read_header.semantic + ); + + // target node in cache + if (err == ERR_SUCCESS) + { + dbg_dassert(addr != end_point::INVALID, ""); + + if (request->header_pos != 0xffff) + { + if (request->is_read) + { + request->read_header.gpid.app_id = app_id; + marshall(msg->writer(), request->read_header, request->header_pos); + msg->header().client.hash = gpid_to_hash(request->read_header.gpid); + } + else + { + request->write_header.gpid.app_id = app_id; + marshall(msg->writer(), request->write_header, request->header_pos); + msg->header().client.hash = gpid_to_hash(request->write_header.gpid); + } + request->header_pos = 0xffff; + } + + rpc::call( + addr, + msg, + this, + std::bind( + &replication_app_client_base::replica_rw_reply, + this, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + request + ) + ); + } + + // target node not known + else if (!no_delay) + { + // delay 1 second for further config query + tasking::enqueue(LPC_REPLICATION_DELAY_QUERY_CONFIG, this, + std::bind(&replication_app_client_base::call, this, request, true), + 0, + 1000 + ); + } + + else + { + zauto_lock l(_requests_lock); + + // init timeout timer if necessary + if (request->timeout_timer == nullptr) + { + request->timeout_timer = tasking::enqueue( + LPC_REPLICATION_CLIENT_REQUEST_TIMEOUT, + this, + std::bind(&replication_app_client_base::on_user_request_timeout, this, request), + 0, + static_cast((msg->header().client.timeout_ts_us - nts) / 1000) + ); + } + + // put into pending queue of querying target partition + auto it = _pending_requests.find(request->partition_index); + if (it == _pending_requests.end()) + { + auto pc = new partition_context; + pc->query_config_task = nullptr; + it = _pending_requests.insert(pending_requests::value_type(request->partition_index, pc)).first; + } + + it->second->requests.push_back(request); + + // init configuration query task if necessary + if (it->second->query_config_task == nullptr) + { + message_ptr msg = message::create_request(RPC_CM_CALL); + + meta_request_header hdr; + hdr.rpc_tag = RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX; + marshall(msg->writer(), hdr); + + configuration_query_by_index_request req; + req.app_name = _app_name; + req.partition_indices.push_back(request->partition_index); + marshall(msg->writer(), req); + + it->second->query_config_task = rpc::call_replicated( + _last_contact_point, + _meta_servers, + msg, + + this, + std::bind(&replication_app_client_base::query_partition_configuration_reply, + this, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + request->partition_index + ) + ); + } + } +} + +void replication_app_client_base::replica_rw_reply( + error_code err, + message_ptr& request, + message_ptr& response, + request_context* rc + ) +{ + if (err != ERR_SUCCESS) + { + goto Retry; + } + + int err2; + response->reader().read(err2); + + if (err2 != ERR_SUCCESS && err2 != ERR_HANDLER_NOT_FOUND) + { + goto Retry; + } + else + { + end_request(rc, err, response); + delete rc; + } + return; + +Retry: + // clear partition configuration as it could be wrong + { + zauto_write_lock l(_config_lock); + _config_cache.erase(rc->is_read ? rc->read_header.gpid.pidx : rc->write_header.gpid.pidx); + } + + // then retry + call(rc, false); +} + +error_code replication_app_client_base::get_address(int pidx, bool is_write, __out_param end_point& addr, __out_param int& app_id, read_semantic_t semantic) +{ + error_code err; + partition_configuration config; + + { + zauto_read_lock l(_config_lock); + auto it = _config_cache.find(pidx); + if (it != _config_cache.end()) + { + err = ERR_SUCCESS; + config = it->second; + } + else + { + err = ERR_IO_PENDING; + } + } + + if (err == ERR_SUCCESS) + { + app_id = _app_id; + if (is_write) + { + addr = config.primary; + } + else + { + addr = get_read_address(semantic, config); + } + + if (dsn::end_point::INVALID == addr) + { + err = ERR_IO_PENDING; + } + } + return err; +} + +void replication_app_client_base::query_partition_configuration_reply(error_code err, message_ptr& request, message_ptr& response, int pidx) +{ + if (!err) + { + configuration_query_by_index_response resp; + unmarshall(response->reader(), resp); + if (resp.err == ERR_SUCCESS) + { + zauto_write_lock l(_config_lock); + _last_contact_point = response->header().from_address; + + if (resp.partitions.size() > 0) + { + if (_app_id != -1 && _app_id != resp.partitions[0].gpid.app_id) + { + dassert(false, "app id is changed (mostly the app was removed and created with the same name), local Vs remote: %u vs %u ", + _app_id, resp.partitions[0].gpid.app_id); + } + + _app_id = resp.partitions[0].gpid.app_id; + } + + for (auto it = resp.partitions.begin(); it != resp.partitions.end(); it++) + { + partition_configuration& new_config = *it; + auto it2 = _config_cache.find(new_config.gpid.pidx); + if (it2 == _config_cache.end()) + { + _config_cache[new_config.gpid.pidx] = new_config; + } + else if (it2->second.ballot < new_config.ballot) + { + it2->second = new_config; + } + } + } + } + + // send pending client msgs + partition_context* pc = nullptr; + { + zauto_lock l(_requests_lock); + auto it = _pending_requests.find(pidx); + if (it != _pending_requests.end()) + { + pc = it->second; + _pending_requests.erase(pidx); + } + } + + if (pc != nullptr) + { + for (auto& req : pc->requests) + { + call(req, false); + } + pc->requests.clear(); + delete pc; + } +} + +end_point replication_app_client_base::get_read_address(read_semantic_t semantic, const partition_configuration& config) +{ + if (semantic == read_semantic_t::ReadLastUpdate) + return config.primary; + + // readsnapshot or readoutdated, using random + else + { + bool has_primary = false; + int N = static_cast(config.secondaries.size()); + if (config.primary != dsn::end_point::INVALID) + { + N++; + has_primary = true; + } + + if (0 == N) return config.primary; + + int r = random32(0, 1000) % N; + if (has_primary && r == N - 1) + return config.primary; + else + return config.secondaries[r]; + } +} + +}} // end namespace diff --git a/src/apps/replication/client_lib/replication_common.cpp b/src/apps/replication/client_lib/replication_common.cpp new file mode 100644 index 0000000000..e13d75e62c --- /dev/null +++ b/src/apps/replication/client_lib/replication_common.cpp @@ -0,0 +1,196 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replication_common.h" +#include + +namespace dsn { namespace replication { + +replication_options::replication_options() +{ + prepare_timeout_ms_for_secondaries = 1000; + prepare_timeout_ms_for_potential_secondaries = 3000; + staleness_for_commit = 10; + staleness_for_start_prepare_for_potential_secondary = 110; + mutation_2pc_min_replica_count = 1; + preapre_list_max_size_mb = 250; + group_check_internal_ms = 100000; + group_check_disabled = false; + gc_interval_ms = 30 * 1000; // 30000 milliseconds + gc_disabled = false; + gc_memory_replica_interval_ms = 5 * 60 * 1000; // 5 minutes + gc_disk_error_replica_interval_seconds = 48 * 3600 * 1000; // 48 hrs + log_batch_write = true; + log_max_concurrent_writes = 4; + fd_disabled = false; + //_options.meta_servers = ...; + fd_check_interval_seconds = 5; + fd_beacon_interval_seconds = 3; + fd_lease_seconds = 10; + fd_grace_seconds = 15; + working_dir = "."; + + log_buffer_size_mb = 1; + log_pending_max_ms = 100; + log_file_size_mb = 32; + + config_sync_interval_ms = 30000; + config_sync_disabled = false; +} + +replication_options::~replication_options() +{ +} + +void replication_options::read_meta_servers(configuration_ptr config) +{ + // read meta_servers from machine list file + meta_servers.clear(); + + std::vector servers; + config->get_all_keys("replication.meta_servers", servers); + for (auto& s : servers) + { + // name:port + auto pos1 = s.find_first_of(':'); + if (pos1 != std::string::npos) + { + end_point ep(s.substr(0, pos1).c_str(), atoi(s.substr(pos1 + 1).c_str())); + meta_servers.push_back(ep); + } + } +} + +void replication_options::initialize(configuration_ptr config) +{ + prepare_timeout_ms_for_secondaries = + config->get_value("replication", "prepare_timeout_ms_for_secondaries", prepare_timeout_ms_for_secondaries); + prepare_timeout_ms_for_potential_secondaries = + config->get_value("replication", "prepare_timeout_ms_for_potential_secondaries", prepare_timeout_ms_for_potential_secondaries); + + staleness_for_commit = + config->get_value("replication", "staleness_for_commit", staleness_for_commit); + staleness_for_start_prepare_for_potential_secondary = + config->get_value("replication", "staleness_for_start_prepare_for_potential_secondary", staleness_for_start_prepare_for_potential_secondary); + mutation_2pc_min_replica_count = + config->get_value("replication", "mutation_2pc_min_replica_count", mutation_2pc_min_replica_count); + preapre_list_max_size_mb = + config->get_value("replication", "preapre_list_max_size_mb", preapre_list_max_size_mb); + group_check_internal_ms = + config->get_value("replication", "group_check_internal_ms", group_check_internal_ms); + group_check_disabled = + config->get_value("replication", "group_check_disabled", group_check_disabled); + gc_interval_ms = + config->get_value("replication", "gc_interval_ms", gc_interval_ms); + gc_memory_replica_interval_ms = + config->get_value("replication", "gc_memory_replica_interval_ms", gc_memory_replica_interval_ms); + gc_disk_error_replica_interval_seconds = + config->get_value("replication", "gc_disk_error_replica_interval_seconds", gc_disk_error_replica_interval_seconds); + gc_disabled = + config->get_value("replication", "gc_disabled", gc_disabled); + + fd_disabled = + config->get_value("replication", "fd_disabled", fd_disabled); + //_options.meta_servers = ...; + fd_check_interval_seconds = + config->get_value("replication", "fd_check_interval_seconds", fd_check_interval_seconds); + fd_beacon_interval_seconds = + config->get_value("replication", "fd_beacon_interval_seconds", fd_beacon_interval_seconds); + fd_lease_seconds = + config->get_value("replication", "fd_lease_seconds", fd_lease_seconds); + fd_grace_seconds = + config->get_value("replication", "fd_grace_seconds", fd_grace_seconds); + working_dir = config->get_string_value("replication", "working_dir", working_dir.c_str()); + + log_file_size_mb = + config->get_value("replication", "log_file_size_mb", log_file_size_mb); + log_buffer_size_mb = + config->get_value("replication", "log_buffer_size_mb", log_buffer_size_mb); + log_pending_max_ms = + config->get_value("replication", "log_pending_max_ms", log_pending_max_ms); + log_batch_write = + config->get_value("replication", "log_batch_write", log_batch_write); + log_max_concurrent_writes = + config->get_value("replication", "log_max_concurrent_writes", log_max_concurrent_writes); + + config_sync_disabled = + config->get_value("replication", "config_sync_disabled", config_sync_disabled); + //_options.meta_servers = ...; + config_sync_interval_ms = + config->get_value("replication", "config_sync_interval_ms", config_sync_interval_ms); + + read_meta_servers(config); + + sanity_check(); +} + +void replication_options::sanity_check() +{ + dassert (staleness_for_start_prepare_for_potential_secondary >= staleness_for_commit, ""); +} + +/*static*/ bool replica_helper::remove_node(const end_point& node, __inout_param std::vector& nodeList) +{ + auto it = std::find(nodeList.begin(), nodeList.end(), node); + if (it != nodeList.end()) + { + nodeList.erase(it); + return true; + } + else + { + return false; + } +} + +/*static*/ bool replica_helper::get_replica_config(const partition_configuration& partitionConfig, const end_point& node, __out_param replica_configuration& replicaConfig) +{ + replicaConfig.gpid = partitionConfig.gpid; + replicaConfig.primary = partitionConfig.primary; + replicaConfig.ballot = partitionConfig.ballot; + + if (node == partitionConfig.primary) + { + replicaConfig.status = PS_PRIMARY; + return true; + } + else if (std::find(partitionConfig.secondaries.begin(), partitionConfig.secondaries.end(), node) != partitionConfig.secondaries.end()) + { + replicaConfig.status = PS_SECONDARY; + return true; + } + else if (std::find(partitionConfig.drop_outs.begin(), partitionConfig.drop_outs.end(), node) != partitionConfig.drop_outs.end()) + { + replicaConfig.status = PS_INACTIVE; + return true; + } + else + { + replicaConfig.status = PS_INACTIVE; + return false; + } +} + +}} // end namespace diff --git a/src/apps/replication/client_lib/replication_common.h b/src/apps/replication/client_lib/replication_common.h new file mode 100644 index 0000000000..b01dbc83b1 --- /dev/null +++ b/src/apps/replication/client_lib/replication_common.h @@ -0,0 +1,101 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include "replication_ds.h" + +using namespace ::dsn::service; + +namespace dsn { namespace replication { + +inline int gpid_to_hash(global_partition_id gpid) +{ + return static_cast(gpid.app_id ^ gpid.pidx); +} + +typedef std::set NodeSet; +typedef std::map NodeIdMap; +typedef std::map NodeDecreeMap; +typedef std::map NodeStatusMap; +typedef std::map node_tasks; + +class replication_options +{ +public: + std::string working_dir; + std::vector meta_servers; + + int32_t prepare_timeout_ms_for_secondaries; + int32_t prepare_timeout_ms_for_potential_secondaries; + int32_t preapre_list_max_size_mb; + + int32_t staleness_for_commit; + int32_t staleness_for_start_prepare_for_potential_secondary; + int32_t mutation_2pc_min_replica_count; + + bool group_check_disabled; + int32_t group_check_internal_ms; + + int32_t gc_interval_ms; + bool gc_disabled; + int32_t gc_memory_replica_interval_ms; + int32_t gc_disk_error_replica_interval_seconds; + + bool fd_disabled; + int32_t fd_check_interval_seconds; + int32_t fd_beacon_interval_seconds; + int32_t fd_lease_seconds; + int32_t fd_grace_seconds; + + int32_t log_file_size_mb; + int32_t log_buffer_size_mb; + int32_t log_pending_max_ms; + bool log_batch_write; + int32_t log_max_concurrent_writes; + + int32_t config_sync_interval_ms; + bool config_sync_disabled; + +public: + replication_options(); + void initialize(configuration_ptr config); + ~replication_options(); + +private: + void read_meta_servers(configuration_ptr config); + void sanity_check(); +}; + +class replica_helper +{ +public: + static bool remove_node(const end_point& node, __inout_param std::vector& nodeList); + static bool get_replica_config(const partition_configuration& partitionConfig, const end_point& node, __out_param replica_configuration& replicaConfig); +}; + +}} // namespace diff --git a/src/apps/replication/client_lib/replication_ds.h b/src/apps/replication/client_lib/replication_ds.h new file mode 100644 index 0000000000..dfbc183de9 --- /dev/null +++ b/src/apps/replication/client_lib/replication_ds.h @@ -0,0 +1,58 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +# include + +namespace dsn { + namespace replication { + + ENUM_BEGIN(partition_status, PS_INVALID) + ENUM_REG(PS_INACTIVE) + ENUM_REG(PS_ERROR) + ENUM_REG(PS_PRIMARY) + ENUM_REG(PS_SECONDARY) + ENUM_REG(PS_POTENTIAL_SECONDARY) + ENUM_END(partition_status) + + ENUM_BEGIN(learner_status, Learning_INVALID) + ENUM_REG(LearningWithoutPrepare) + ENUM_REG(LearningWithPrepare) + ENUM_REG(LearningSucceeded) + ENUM_REG(LearningFailed) + ENUM_END(learner_status) + + ENUM_BEGIN(config_type, CT_NONE) + ENUM_REG(CT_ASSIGN_PRIMARY) + ENUM_REG(CT_ADD_SECONDARY) + ENUM_REG(CT_DOWNGRADE_TO_SECONDARY) + ENUM_REG(CT_DOWNGRADE_TO_INACTIVE) + ENUM_REG(CT_REMOVE) + ENUM_REG(CT_UPGRADE_TO_SECONDARY) + ENUM_END(config_type) + } +} // end namespace dsn::replication diff --git a/src/apps/replication/client_lib/replication_types.cpp b/src/apps/replication/client_lib/replication_types.cpp new file mode 100644 index 0000000000..3f5c570526 --- /dev/null +++ b/src/apps/replication/client_lib/replication_types.cpp @@ -0,0 +1,665 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +/** + * Autogenerated by Thrift Compiler (@PACKAGE_VERSION@) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +#include + +namespace dsn { namespace replication { + + +global_partition_id::~global_partition_id() throw() { +} + + +void swap(global_partition_id &a, global_partition_id &b) { + using ::std::swap; + swap(a.app_id, b.app_id); + swap(a.pidx, b.pidx); +} + +global_partition_id::global_partition_id(const global_partition_id& other0) { + app_id = other0.app_id; + pidx = other0.pidx; +} +global_partition_id& global_partition_id::operator=(const global_partition_id& other1) { + app_id = other1.app_id; + pidx = other1.pidx; + return *this; +} + +bool global_partition_id::operator <(const global_partition_id& other1) const +{ + return app_id < other1.app_id + || (app_id == other1.app_id && pidx < other1.pidx) + ; +} + +mutation_header::~mutation_header() throw() { +} + + +void swap(mutation_header &a, mutation_header &b) { + using ::std::swap; + swap(a.gpid, b.gpid); + swap(a.ballot, b.ballot); + swap(a.decree, b.decree); + swap(a.log_offset, b.log_offset); + swap(a.last_committed_decree, b.last_committed_decree); +} + +mutation_header::mutation_header(const mutation_header& other2) { + gpid = other2.gpid; + ballot = other2.ballot; + decree = other2.decree; + log_offset = other2.log_offset; + last_committed_decree = other2.last_committed_decree; +} +mutation_header& mutation_header::operator=(const mutation_header& other3) { + gpid = other3.gpid; + ballot = other3.ballot; + decree = other3.decree; + log_offset = other3.log_offset; + last_committed_decree = other3.last_committed_decree; + return *this; +} + +mutation_data::~mutation_data() throw() { +} + + +void swap(mutation_data &a, mutation_data &b) { + using ::std::swap; + swap(a.header, b.header); + swap(a.updates, b.updates); +} + +mutation_data::mutation_data(const mutation_data& other4) { + header = other4.header; + updates = other4.updates; +} +mutation_data& mutation_data::operator=(const mutation_data& other5) { + header = other5.header; + updates = other5.updates; + return *this; +} + +partition_configuration::~partition_configuration() throw() { +} + + +void swap(partition_configuration &a, partition_configuration &b) { + using ::std::swap; + swap(a.app_type, b.app_type); + swap(a.gpid, b.gpid); + swap(a.ballot, b.ballot); + swap(a.max_replica_count, b.max_replica_count); + swap(a.primary, b.primary); + swap(a.secondaries, b.secondaries); + swap(a.drop_outs, b.drop_outs); + swap(a.last_committed_decree, b.last_committed_decree); +} + +partition_configuration::partition_configuration(const partition_configuration& other6) { + app_type = other6.app_type; + gpid = other6.gpid; + ballot = other6.ballot; + max_replica_count = other6.max_replica_count; + primary = other6.primary; + secondaries = other6.secondaries; + drop_outs = other6.drop_outs; + last_committed_decree = other6.last_committed_decree; +} +partition_configuration& partition_configuration::operator=(const partition_configuration& other7) { + app_type = other7.app_type; + gpid = other7.gpid; + ballot = other7.ballot; + max_replica_count = other7.max_replica_count; + primary = other7.primary; + secondaries = other7.secondaries; + drop_outs = other7.drop_outs; + last_committed_decree = other7.last_committed_decree; + return *this; +} + +replica_configuration::~replica_configuration() throw() { +} + + +void swap(replica_configuration &a, replica_configuration &b) { + using ::std::swap; + swap(a.gpid, b.gpid); + swap(a.ballot, b.ballot); + swap(a.primary, b.primary); + swap(a.status, b.status); +} + +replica_configuration::replica_configuration(const replica_configuration& other8) { + gpid = other8.gpid; + ballot = other8.ballot; + primary = other8.primary; + status = other8.status; +} +replica_configuration& replica_configuration::operator=(const replica_configuration& other9) { + gpid = other9.gpid; + ballot = other9.ballot; + primary = other9.primary; + status = other9.status; + return *this; +} + +prepare_msg::~prepare_msg() throw() { +} + + +void swap(prepare_msg &a, prepare_msg &b) { + using ::std::swap; + swap(a.config, b.config); + swap(a.mu, b.mu); +} + +prepare_msg::prepare_msg(const prepare_msg& other10) { + config = other10.config; + mu = other10.mu; +} +prepare_msg& prepare_msg::operator=(const prepare_msg& other11) { + config = other11.config; + mu = other11.mu; + return *this; +} + +read_request_header::~read_request_header() throw() { +} + + +void swap(read_request_header &a, read_request_header &b) { + using ::std::swap; + swap(a.gpid, b.gpid); + swap(a.code, b.code); + swap(a.semantic, b.semantic); + swap(a.version_decree, b.version_decree); +} + +read_request_header::read_request_header(const read_request_header& other12) { + gpid = other12.gpid; + code = other12.code; + semantic = other12.semantic; + version_decree = other12.version_decree; +} +read_request_header& read_request_header::operator=(const read_request_header& other13) { + gpid = other13.gpid; + code = other13.code; + semantic = other13.semantic; + version_decree = other13.version_decree; + return *this; +} + +write_request_header::~write_request_header() throw() { +} + + +void swap(write_request_header &a, write_request_header &b) { + using ::std::swap; + swap(a.gpid, b.gpid); + swap(a.code, b.code); +} + +write_request_header::write_request_header(const write_request_header& other14) { + gpid = other14.gpid; + code = other14.code; +} +write_request_header& write_request_header::operator=(const write_request_header& other15) { + gpid = other15.gpid; + code = other15.code; + return *this; +} + +rw_response_header::~rw_response_header() throw() { +} + + +void swap(rw_response_header &a, rw_response_header &b) { + using ::std::swap; + swap(a.err, b.err); +} + +rw_response_header::rw_response_header(const rw_response_header& other16) { + err = other16.err; +} +rw_response_header& rw_response_header::operator=(const rw_response_header& other17) { + err = other17.err; + return *this; +} + +prepare_ack::~prepare_ack() throw() { +} + + +void swap(prepare_ack &a, prepare_ack &b) { + using ::std::swap; + swap(a.gpid, b.gpid); + swap(a.err, b.err); + swap(a.ballot, b.ballot); + swap(a.decree, b.decree); + swap(a.last_committed_decree_in_app, b.last_committed_decree_in_app); + swap(a.last_committed_decree_in_prepare_list, b.last_committed_decree_in_prepare_list); +} + +prepare_ack::prepare_ack(const prepare_ack& other18) { + gpid = other18.gpid; + err = other18.err; + ballot = other18.ballot; + decree = other18.decree; + last_committed_decree_in_app = other18.last_committed_decree_in_app; + last_committed_decree_in_prepare_list = other18.last_committed_decree_in_prepare_list; +} +prepare_ack& prepare_ack::operator=(const prepare_ack& other19) { + gpid = other19.gpid; + err = other19.err; + ballot = other19.ballot; + decree = other19.decree; + last_committed_decree_in_app = other19.last_committed_decree_in_app; + last_committed_decree_in_prepare_list = other19.last_committed_decree_in_prepare_list; + return *this; +} + +learn_state::~learn_state() throw() { +} + + +void swap(learn_state &a, learn_state &b) { + using ::std::swap; + swap(a.meta, b.meta); + swap(a.files, b.files); +} + +learn_state::learn_state(const learn_state& other20) { + meta = other20.meta; + files = other20.files; +} +learn_state& learn_state::operator=(const learn_state& other21) { + meta = other21.meta; + files = other21.files; + return *this; +} + +learn_request::~learn_request() throw() { +} + + +void swap(learn_request &a, learn_request &b) { + using ::std::swap; + swap(a.gpid, b.gpid); + swap(a.learner, b.learner); + swap(a.signature, b.signature); + swap(a.last_committed_decree_in_app, b.last_committed_decree_in_app); + swap(a.last_committed_decree_in_prepare_list, b.last_committed_decree_in_prepare_list); + swap(a.app_specific_learn_request, b.app_specific_learn_request); +} + +learn_request::learn_request(const learn_request& other22) { + gpid = other22.gpid; + learner = other22.learner; + signature = other22.signature; + last_committed_decree_in_app = other22.last_committed_decree_in_app; + last_committed_decree_in_prepare_list = other22.last_committed_decree_in_prepare_list; + app_specific_learn_request = other22.app_specific_learn_request; +} +learn_request& learn_request::operator=(const learn_request& other23) { + gpid = other23.gpid; + learner = other23.learner; + signature = other23.signature; + last_committed_decree_in_app = other23.last_committed_decree_in_app; + last_committed_decree_in_prepare_list = other23.last_committed_decree_in_prepare_list; + app_specific_learn_request = other23.app_specific_learn_request; + return *this; +} + +learn_response::~learn_response() throw() { +} + + +void swap(learn_response &a, learn_response &b) { + using ::std::swap; + swap(a.err, b.err); + swap(a.config, b.config); + swap(a.commit_decree, b.commit_decree); + swap(a.prepare_start_decree, b.prepare_start_decree); + swap(a.state, b.state); + swap(a.base_local_dir, b.base_local_dir); +} + +learn_response::learn_response(const learn_response& other24) { + err = other24.err; + config = other24.config; + commit_decree = other24.commit_decree; + prepare_start_decree = other24.prepare_start_decree; + state = other24.state; + base_local_dir = other24.base_local_dir; +} +learn_response& learn_response::operator=(const learn_response& other25) { + err = other25.err; + config = other25.config; + commit_decree = other25.commit_decree; + prepare_start_decree = other25.prepare_start_decree; + state = other25.state; + base_local_dir = other25.base_local_dir; + return *this; +} + +group_check_request::~group_check_request() throw() { +} + + +void swap(group_check_request &a, group_check_request &b) { + using ::std::swap; + swap(a.app_type, b.app_type); + swap(a.node, b.node); + swap(a.config, b.config); + swap(a.last_committed_decree, b.last_committed_decree); + swap(a.learner_signature, b.learner_signature); +} + +group_check_request::group_check_request(const group_check_request& other26) { + app_type = other26.app_type; + node = other26.node; + config = other26.config; + last_committed_decree = other26.last_committed_decree; + learner_signature = other26.learner_signature; +} +group_check_request& group_check_request::operator=(const group_check_request& other27) { + app_type = other27.app_type; + node = other27.node; + config = other27.config; + last_committed_decree = other27.last_committed_decree; + learner_signature = other27.learner_signature; + return *this; +} + +group_check_response::~group_check_response() throw() { +} + + +void swap(group_check_response &a, group_check_response &b) { + using ::std::swap; + swap(a.gpid, b.gpid); + swap(a.err, b.err); + swap(a.last_committed_decree_in_app, b.last_committed_decree_in_app); + swap(a.last_committed_decree_in_prepare_list, b.last_committed_decree_in_prepare_list); + swap(a.learner_status_, b.learner_status_); + swap(a.learner_signature, b.learner_signature); + swap(a.node, b.node); +} + +group_check_response::group_check_response(const group_check_response& other28) { + gpid = other28.gpid; + err = other28.err; + last_committed_decree_in_app = other28.last_committed_decree_in_app; + last_committed_decree_in_prepare_list = other28.last_committed_decree_in_prepare_list; + learner_status_ = other28.learner_status_; + learner_signature = other28.learner_signature; + node = other28.node; +} +group_check_response& group_check_response::operator=(const group_check_response& other29) { + gpid = other29.gpid; + err = other29.err; + last_committed_decree_in_app = other29.last_committed_decree_in_app; + last_committed_decree_in_prepare_list = other29.last_committed_decree_in_prepare_list; + learner_status_ = other29.learner_status_; + learner_signature = other29.learner_signature; + node = other29.node; + return *this; +} + +meta_request_header::~meta_request_header() throw() { +} + + +void swap(meta_request_header &a, meta_request_header &b) { + using ::std::swap; + swap(a.rpc_tag, b.rpc_tag); +} + +meta_request_header::meta_request_header(const meta_request_header& other30) { + rpc_tag = other30.rpc_tag; +} +meta_request_header& meta_request_header::operator=(const meta_request_header& other31) { + rpc_tag = other31.rpc_tag; + return *this; +} + +meta_response_header::~meta_response_header() throw() { +} + + +void swap(meta_response_header &a, meta_response_header &b) { + using ::std::swap; + swap(a.err, b.err); + swap(a.primary_address, b.primary_address); +} + +meta_response_header::meta_response_header(const meta_response_header& other32) { + err = other32.err; + primary_address = other32.primary_address; +} +meta_response_header& meta_response_header::operator=(const meta_response_header& other33) { + err = other33.err; + primary_address = other33.primary_address; + return *this; +} + +configuration_update_request::~configuration_update_request() throw() { +} + + +void swap(configuration_update_request &a, configuration_update_request &b) { + using ::std::swap; + swap(a.config, b.config); + swap(a.type, b.type); + swap(a.node, b.node); +} + +configuration_update_request::configuration_update_request(const configuration_update_request& other34) { + config = other34.config; + type = other34.type; + node = other34.node; +} +configuration_update_request& configuration_update_request::operator=(const configuration_update_request& other35) { + config = other35.config; + type = other35.type; + node = other35.node; + return *this; +} + +configuration_update_response::~configuration_update_response() throw() { +} + + +void swap(configuration_update_response &a, configuration_update_response &b) { + using ::std::swap; + swap(a.err, b.err); + swap(a.config, b.config); +} + +configuration_update_response::configuration_update_response(const configuration_update_response& other36) { + err = other36.err; + config = other36.config; +} +configuration_update_response& configuration_update_response::operator=(const configuration_update_response& other37) { + err = other37.err; + config = other37.config; + return *this; +} + +configuration_proposal_request::~configuration_proposal_request() throw() { +} + + +void swap(configuration_proposal_request &a, configuration_proposal_request &b) { + using ::std::swap; + swap(a.config, b.config); + swap(a.type, b.type); + swap(a.node, b.node); + swap(a.is_clean_data, b.is_clean_data); + swap(a.is_upgrade, b.is_upgrade); +} + +configuration_proposal_request::configuration_proposal_request(const configuration_proposal_request& other38) { + config = other38.config; + type = other38.type; + node = other38.node; + is_clean_data = other38.is_clean_data; + is_upgrade = other38.is_upgrade; +} +configuration_proposal_request& configuration_proposal_request::operator=(const configuration_proposal_request& other39) { + config = other39.config; + type = other39.type; + node = other39.node; + is_clean_data = other39.is_clean_data; + is_upgrade = other39.is_upgrade; + return *this; +} + +configuration_query_by_node_request::~configuration_query_by_node_request() throw() { +} + + +void swap(configuration_query_by_node_request &a, configuration_query_by_node_request &b) { + using ::std::swap; + swap(a.node, b.node); +} + +configuration_query_by_node_request::configuration_query_by_node_request(const configuration_query_by_node_request& other40) { + node = other40.node; +} +configuration_query_by_node_request& configuration_query_by_node_request::operator=(const configuration_query_by_node_request& other41) { + node = other41.node; + return *this; +} + +configuration_query_by_node_response::~configuration_query_by_node_response() throw() { +} + + +void swap(configuration_query_by_node_response &a, configuration_query_by_node_response &b) { + using ::std::swap; + swap(a.err, b.err); + swap(a.partitions, b.partitions); +} + +configuration_query_by_node_response::configuration_query_by_node_response(const configuration_query_by_node_response& other42) { + err = other42.err; + partitions = other42.partitions; +} +configuration_query_by_node_response& configuration_query_by_node_response::operator=(const configuration_query_by_node_response& other43) { + err = other43.err; + partitions = other43.partitions; + return *this; +} + +configuration_query_by_index_request::~configuration_query_by_index_request() throw() { +} + + +void swap(configuration_query_by_index_request &a, configuration_query_by_index_request &b) { + using ::std::swap; + swap(a.app_name, b.app_name); + swap(a.partition_indices, b.partition_indices); +} + +configuration_query_by_index_request::configuration_query_by_index_request(const configuration_query_by_index_request& other44) { + app_name = other44.app_name; + partition_indices = other44.partition_indices; +} +configuration_query_by_index_request& configuration_query_by_index_request::operator=(const configuration_query_by_index_request& other45) { + app_name = other45.app_name; + partition_indices = other45.partition_indices; + return *this; +} + +configuration_query_by_index_response::~configuration_query_by_index_response() throw() { +} + + +void swap(configuration_query_by_index_response &a, configuration_query_by_index_response &b) { + using ::std::swap; + swap(a.err, b.err); + swap(a.partitions, b.partitions); +} + +configuration_query_by_index_response::configuration_query_by_index_response(const configuration_query_by_index_response& other46) { + err = other46.err; + partitions = other46.partitions; +} +configuration_query_by_index_response& configuration_query_by_index_response::operator=(const configuration_query_by_index_response& other47) { + err = other47.err; + partitions = other47.partitions; + return *this; +} + +query_replica_decree_request::~query_replica_decree_request() throw() { +} + + +void swap(query_replica_decree_request &a, query_replica_decree_request &b) { + using ::std::swap; + swap(a.gpid, b.gpid); + swap(a.node, b.node); +} + +query_replica_decree_request::query_replica_decree_request(const query_replica_decree_request& other48) { + gpid = other48.gpid; + node = other48.node; +} +query_replica_decree_request& query_replica_decree_request::operator=(const query_replica_decree_request& other49) { + gpid = other49.gpid; + node = other49.node; + return *this; +} + +query_replica_decree_response::~query_replica_decree_response() throw() { +} + + +void swap(query_replica_decree_response &a, query_replica_decree_response &b) { + using ::std::swap; + swap(a.err, b.err); + swap(a.last_decree, b.last_decree); +} + +query_replica_decree_response::query_replica_decree_response(const query_replica_decree_response& other50) { + err = other50.err; + last_decree = other50.last_decree; +} +query_replica_decree_response& query_replica_decree_response::operator=(const query_replica_decree_response& other51) { + err = other51.err; + last_decree = other51.last_decree; + return *this; +} +}} // namespace diff --git a/src/apps/replication/client_lib/rpc_replicated.cpp b/src/apps/replication/client_lib/rpc_replicated.cpp new file mode 100644 index 0000000000..6e753b0966 --- /dev/null +++ b/src/apps/replication/client_lib/rpc_replicated.cpp @@ -0,0 +1,159 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "replication_common.h" +# include "rpc_replicated.h" + +using namespace dsn::replication; + +namespace dsn { + namespace service { + namespace rpc { + + namespace rpc_replicated_impl { + + struct params + { + std::vector servers; + rpc_response_task_ptr response_task; + rpc_reply_handler callback; + }; + + static end_point get_next_server(const end_point& currentServer, const std::vector& servers) + { + if (currentServer == end_point::INVALID) + { + return servers[env::random32(0, static_cast(servers.size()) * 13) % static_cast(servers.size())]; + } + else + { + auto it = std::find(servers.begin(), servers.end(), currentServer); + if (it != servers.end()) + { + ++it; + return it == servers.end() ? *servers.begin() : *it; + } + else + { + return servers[env::random32(0, static_cast(servers.size()) * 13) % static_cast(servers.size())]; + } + } + } + + static void internal_rpc_reply_callback(error_code err, message_ptr& request, message_ptr& response, params* ps) + { + //printf ("%s\n", __FUNCTION__); + + end_point next_server; + if (!err) + { + meta_response_header header; + unmarshall(response->reader(), header); + + if (header.err == ERR_SERVICE_NOT_ACTIVE || header.err == ERR_BUSY) + { + + } + else if (header.err == ERR_TALK_TO_OTHERS) + { + next_server = header.primary_address; + err = ERR_SUCCESS; + } + else + { + if (nullptr != ps->callback) + { + (ps->callback)(err, request, response); + } + delete ps; + return; + } + } + + if (err) + { + if (nullptr != ps->callback) + { + (ps->callback)(err, request, response); + } + delete ps; + return; + } + + rpc::call( + next_server, + request, + ps->response_task + ); + } + + + } // end namespace rpc_replicated_impl + + rpc_response_task_ptr call_replicated( + const end_point& first_server, + const std::vector& servers, + message_ptr& request, + + // reply + servicelet* svc, + rpc_reply_handler callback, + int reply_hash + ) + { + end_point first = first_server; + if (first == end_point::INVALID) + { + first = rpc_replicated_impl::get_next_server(first_server, servers); + } + + rpc_replicated_impl::params *ps = new rpc_replicated_impl::params; + ps->servers = servers; + ps->callback = callback; + + std::function cb = std::bind( + &rpc_replicated_impl::internal_rpc_reply_callback, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + ps + ); + + ps->response_task = new internal_use_only::service_rpc_response_task4( + svc, + cb, + request, + reply_hash + ); + + return rpc::call( + first, + request, + ps->response_task + ); + } + } + } +} // end namespace diff --git a/src/apps/replication/client_lib/rpc_replicated.h b/src/apps/replication/client_lib/rpc_replicated.h new file mode 100644 index 0000000000..46775480d9 --- /dev/null +++ b/src/apps/replication/client_lib/rpc_replicated.h @@ -0,0 +1,123 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include + +namespace dsn { + namespace service { + namespace rpc { + + template + rpc_response_task_ptr call_typed_replicated( + // servers + const end_point& first_server, + const std::vector& servers, + // request + task_code code, + std::shared_ptr& req, + + // callback + servicelet* owner, + std::function&, std::shared_ptr&, const end_point&)> callback, + int request_hash = 0, + int timeout_milliseconds = 0, + int reply_hash = 0 + ); + + rpc_response_task_ptr call_replicated( + const end_point& first_server, + const std::vector& servers, + message_ptr& request, + + // reply + servicelet* svc, + rpc_reply_handler callback, + int reply_hash = 0 + ); + // ---------------- inline implementation ------------------- + + namespace internal_use_only + { + template + inline void rpc_replicated_callback( + error_code code, + message_ptr& request, + message_ptr& response, + std::shared_ptr& req, + std::function&, std::shared_ptr&, const end_point&)> callback + ) + { + end_point srv = end_point::INVALID; + std::shared_ptr resp(nullptr); + if (code == ERR_SUCCESS) + { + srv = response->header().from_address; + resp.reset(new TResponse); + unmarshall(response->reader(), *resp); + } + callback(code, req, resp, srv); + } + } + + template + inline rpc_response_task_ptr call_typed_replicated( + // servers + const end_point& first_server, + const std::vector& servers, + // request + task_code code, + std::shared_ptr& req, + + // callback + servicelet* owner, + std::function&, std::shared_ptr&, const end_point&)> callback, + int request_hash, + int timeout_milliseconds, + int reply_hash + ) + { + message_ptr request = message::create_request(code, timeout_milliseconds, request_hash); + marshall(request->writer(), *req); + + return call_replicated( + first_server, + servers, + request, + owner, + std::bind(&internal_use_only::rpc_replicated_callback, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + req, + callback + ), + reply_hash + ); + } + } // end rpc + } // end service +} // end namespace dsn diff --git a/src/apps/replication/exe/CMakeLists.txt b/src/apps/replication/exe/CMakeLists.txt new file mode 100644 index 0000000000..cf5b3b23c2 --- /dev/null +++ b/src/apps/replication/exe/CMakeLists.txt @@ -0,0 +1,4 @@ +set(DSN_APP_TARGET "dsn.replication.simple_kv") +set(DSN_EXTRA_LIBS dsn.replication dsn.replication.meta_server dsn.replication.clientlib dsn.failure_detector) +file(GLOB BINPLACE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/config.ini" "${CMAKE_CURRENT_SOURCE_DIR}/test.cmd") +dsn_add_executable(${DSN_APP_TARGET} "${BINPLACE_FILES}") diff --git a/src/apps/replication/exe/config.ini b/src/apps/replication/exe/config.ini new file mode 100644 index 0000000000..291403ec1b --- /dev/null +++ b/src/apps/replication/exe/config.ini @@ -0,0 +1,128 @@ +[apps.metaserver] +name = meta +type = meta +arguments = +ports = 34601 +run = true +count = 1 + +[apps.replicaserver] +name = replica +type = replica +arguments = +ports = 34801 +run = true +count = 3 + +[apps.client] +name = client +type = client +arguments = simple_kv.instance0 +run = true +count = 2 + +[core] + +tool = simulator +;tool = nativerun +;toollets = tracer +toollets = tracer, fault_injector +;toollets = tracer, profiler, fault_injector +;toollets = profiler, fault_injector +pause_on_start = false + +;logging_factory_name = dsn::tools::screen_logger + +[tools.simulator] +random_seed = -1438568420 +use_given_random_seed = false + +[network] +; how many network threads for network library(used by asio) +io_service_worker_count = 2 + +[network.34601] +; channel = network_header_format, network_provider_name, buffer_block_size +;RPC_CHANNEL_TCP = NET_HDR_DSN, dsn::tools::asio_network_provider, 65536 + +;RPC_CHANNEL_TCP = NET_HDR_THRIFT, dsn::tools::asio_network_provider, 65536 + + +; specification for each thread pool +[threadpool.default] +worker_count = 2 + +[threadpool.THREAD_POOL_DEFAULT] +name = default +partitioned = false +max_input_queue_length = 1024 +worker_priority = THREAD_xPRIORITY_NORMAL +worker_count = 2 + +[threadpool.THREAD_POOL_REPLICATION] +name = replication +partitioned = true +; max_input_queue_length = 8192 +worker_priority = THREAD_xPRIORITY_NORMAL + +[task.default] +is_trace = true +is_profile = true +allow_inline = false +rpc_call_channel = RPC_CHANNEL_TCP +fast_execution_in_network_thread = false +rpc_message_header_format = dsn +rpc_timeout_milliseconds = 5000 + +[task.LPC_AIO_IMMEDIATE_CALLBACK] +is_trace = false +allow_inline = false + +[task.LPC_RPC_TIMEOUT] +is_trace = false + +[task.RPC_FD_FAILURE_DETECTOR_PING] +is_trace = false + +[task.RPC_FD_FAILURE_DETECTOR_PING_ACK] +is_trace = false + +[task.LPC_BEACON_CHECK] +is_trace = false + + +[replication.meta_servers] +localhost:34601 + +[replication.app] +app_name = simple_kv.instance0 +app_type = simple_kv +partition_count = 1 +max_replica_count = 3 + +[replication] + +prepare_timeout_ms_for_secondaries = 1000 +learn_timeout_ms = 30000 +staleness_for_commit = 20 +staleness_for_start_prepare_for_potential_secondary = 110 +mutation_max_size_mb = 15 +mutation_max_pending_time_ms = 20 +mutation_2pc_min_replica_count = 2 + +preapre_list_max_size_mb = 250 +request_batch_disabled = false +group_check_internal_ms = 100000 +group_check_disabled = false +fd_disabled = false +fd_check_interval_seconds = 5 +fd_beacon_interval_seconds = 3 +fd_lease_seconds = 14 +fd_grace_seconds = 15 +working_dir = . +log_buffer_size_mb = 1 +log_pending_max_ms = 100 +log_file_size_mb = 32 +log_batch_write = true + +config_sync_interval_ms = 60000 diff --git a/src/apps/replication/exe/simple_kv.app.example.h b/src/apps/replication/exe/simple_kv.app.example.h new file mode 100644 index 0000000000..7d12c145aa --- /dev/null +++ b/src/apps/replication/exe/simple_kv.app.example.h @@ -0,0 +1,113 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once +# include "simple_kv.client.h" +# include "simple_kv.server.h" + +namespace dsn { namespace replication { namespace application { +// client app example +class simple_kv_client_app : public ::dsn::service::service_app, public virtual ::dsn::service::servicelet +{ +public: + simple_kv_client_app(::dsn::service_app_spec* s) + : ::dsn::service::service_app(s) + { + _simple_kv_client = nullptr; + } + + ~simple_kv_client_app() + { + stop(); + } + + virtual ::dsn::error_code start(int argc, char** argv) + { + if (argc < 2) + return ::dsn::ERR_INVALID_PARAMETERS; + + std::vector<::dsn::end_point> meta_servers; + auto cf = ::dsn::service::system::config(); + ::dsn::replication::replication_app_client_base::load_meta_servers(cf, meta_servers); + + _simple_kv_client = new simple_kv_client(meta_servers, argv[1]); + _timer = ::dsn::service::tasking::enqueue(LPC_SIMPLE_KV_TEST_TIMER, this, &simple_kv_client_app::on_test_timer, 0, 0, 1000); + return ::dsn::ERR_SUCCESS; + } + + virtual void stop(bool cleanup = false) + { + _timer->cancel(true); + + if (_simple_kv_client != nullptr) + { + delete _simple_kv_client; + _simple_kv_client = nullptr; + } + } + + void on_test_timer() + { + // test for service 'simple_kv' + { + std::string req; + //sync: + std::string resp; + auto err = _simple_kv_client->read(req, resp); + std::cout << "call RPC_SIMPLE_KV_SIMPLE_KV_READ end, return " << err.to_string() << std::endl; + //async: + //_simple_kv_client->begin_read(req); + + } + { + ::dsn::replication::application::kv_pair req; + //sync: + int32_t resp; + auto err = _simple_kv_client->write(req, resp); + std::cout << "call RPC_SIMPLE_KV_SIMPLE_KV_WRITE end, return " << err.to_string() << std::endl; + //async: + //_simple_kv_client->begin_write(req); + + } + { + ::dsn::replication::application::kv_pair req; + //sync: + int32_t resp; + auto err = _simple_kv_client->append(req, resp); + std::cout << "call RPC_SIMPLE_KV_SIMPLE_KV_APPEND end, return " << err.to_string() << std::endl; + //async: + //_simple_kv_client->begin_append(req); + + } + } + +private: + ::dsn::task_ptr _timer; + ::dsn::end_point _server; + + simple_kv_client *_simple_kv_client; +}; + +} } } \ No newline at end of file diff --git a/src/apps/replication/exe/simple_kv.client.h b/src/apps/replication/exe/simple_kv.client.h new file mode 100644 index 0000000000..d75dd39242 --- /dev/null +++ b/src/apps/replication/exe/simple_kv.client.h @@ -0,0 +1,315 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once +# include +# include "simple_kv.code.definition.h" +# include + +namespace dsn { namespace replication { namespace application { +class simple_kv_client + : public ::dsn::replication::replication_app_client_base +{ +public: + simple_kv_client( + const std::vector& meta_servers, + const char* app_name) + : ::dsn::replication::replication_app_client_base(meta_servers, app_name) + { + } + + virtual ~simple_kv_client() {} + + // from requests to partition index + // PLEASE DO RE-DEFINE THEM IN A SUB CLASS!!! + virtual int get_partition_index(const std::string& key) { return 0;}; + virtual int get_partition_index(const ::dsn::replication::application::kv_pair& key) { return 0;}; + + // ---------- call RPC_SIMPLE_KV_SIMPLE_KV_READ ------------ + // - synchronous + ::dsn::error_code read( + const std::string& key, + __out_param std::string& resp, + int timeout_milliseconds = 0 + ) + { + auto resp_task = ::dsn::replication::replication_app_client_base::read( + get_partition_index(key), + RPC_SIMPLE_KV_SIMPLE_KV_READ, + key, + nullptr, + nullptr, + nullptr, + timeout_milliseconds + ); + resp_task->wait(); + if (resp_task->error() == ::dsn::ERR_SUCCESS) + { + unmarshall(resp_task->get_response()->reader(), resp); + } + return resp_task->error(); + } + + // - asynchronous with on-stack std::string and std::string + ::dsn::rpc_response_task_ptr begin_read( + const std::string& key, + void* context = nullptr, + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + return ::dsn::replication::replication_app_client_base::read( + get_partition_index(key), + RPC_SIMPLE_KV_SIMPLE_KV_READ, + key, + this, + &simple_kv_client::end_read, + context, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_read( + ::dsn::error_code err, + const std::string& resp, + void* context) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_READ err : " << err.to_string() << std::endl; + else + { + std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_READ ok" << std::endl; + } + } + + // - asynchronous with on-heap std::shared_ptr and std::shared_ptr + ::dsn::rpc_response_task_ptr begin_read2( + std::shared_ptr& key, + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + return ::dsn::replication::replication_app_client_base::read( + get_partition_index(*key), + RPC_SIMPLE_KV_SIMPLE_KV_READ, + key, + this, + &simple_kv_client::end_read2, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_read2( + ::dsn::error_code err, + std::shared_ptr& key, + std::shared_ptr& resp) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_READ err : " << err.to_string() << std::endl; + else + { + std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_READ ok" << std::endl; + } + } + + + // ---------- call RPC_SIMPLE_KV_SIMPLE_KV_WRITE ------------ + // - synchronous + ::dsn::error_code write( + const ::dsn::replication::application::kv_pair& pr, + __out_param int32_t& resp, + int timeout_milliseconds = 0 + ) + { + auto resp_task = ::dsn::replication::replication_app_client_base::write<::dsn::replication::application::kv_pair, int32_t>( + get_partition_index(pr), + RPC_SIMPLE_KV_SIMPLE_KV_WRITE, + pr, + nullptr, + nullptr, + nullptr, + timeout_milliseconds + ); + resp_task->wait(); + if (resp_task->error() == ::dsn::ERR_SUCCESS) + { + unmarshall(resp_task->get_response()->reader(), resp); + } + return resp_task->error(); + } + + // - asynchronous with on-stack ::dsn::replication::application::kv_pair and int32_t + ::dsn::rpc_response_task_ptr begin_write( + const ::dsn::replication::application::kv_pair& pr, + void* context = nullptr, + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + return ::dsn::replication::replication_app_client_base::write( + get_partition_index(pr), + RPC_SIMPLE_KV_SIMPLE_KV_WRITE, + pr, + this, + &simple_kv_client::end_write, + context, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_write( + ::dsn::error_code err, + const int32_t& resp, + void* context) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_WRITE err : " << err.to_string() << std::endl; + else + { + std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_WRITE ok" << std::endl; + } + } + + // - asynchronous with on-heap std::shared_ptr<::dsn::replication::application::kv_pair> and std::shared_ptr + ::dsn::rpc_response_task_ptr begin_write2( + std::shared_ptr<::dsn::replication::application::kv_pair>& pr, + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + return ::dsn::replication::replication_app_client_base::write( + get_partition_index(*pr), + RPC_SIMPLE_KV_SIMPLE_KV_WRITE, + pr, + this, + &simple_kv_client::end_write2, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_write2( + ::dsn::error_code err, + std::shared_ptr<::dsn::replication::application::kv_pair>& pr, + std::shared_ptr& resp) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_WRITE err : " << err.to_string() << std::endl; + else + { + std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_WRITE ok" << std::endl; + } + } + + + // ---------- call RPC_SIMPLE_KV_SIMPLE_KV_APPEND ------------ + // - synchronous + ::dsn::error_code append( + const ::dsn::replication::application::kv_pair& pr, + __out_param int32_t& resp, + int timeout_milliseconds = 0 + ) + { + auto resp_task = ::dsn::replication::replication_app_client_base::write<::dsn::replication::application::kv_pair, int32_t>( + get_partition_index(pr), + RPC_SIMPLE_KV_SIMPLE_KV_APPEND, + pr, + nullptr, + nullptr, + nullptr, + timeout_milliseconds + ); + resp_task->wait(); + if (resp_task->error() == ::dsn::ERR_SUCCESS) + { + unmarshall(resp_task->get_response()->reader(), resp); + } + return resp_task->error(); + } + + // - asynchronous with on-stack ::dsn::replication::application::kv_pair and int32_t + ::dsn::rpc_response_task_ptr begin_append( + const ::dsn::replication::application::kv_pair& pr, + void* context = nullptr, + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + return ::dsn::replication::replication_app_client_base::write( + get_partition_index(pr), + RPC_SIMPLE_KV_SIMPLE_KV_APPEND, + pr, + this, + &simple_kv_client::end_append, + context, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_append( + ::dsn::error_code err, + const int32_t& resp, + void* context) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_APPEND err : " << err.to_string() << std::endl; + else + { + std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_APPEND ok" << std::endl; + } + } + + // - asynchronous with on-heap std::shared_ptr<::dsn::replication::application::kv_pair> and std::shared_ptr + ::dsn::rpc_response_task_ptr begin_append2( + std::shared_ptr<::dsn::replication::application::kv_pair>& pr, + int timeout_milliseconds = 0, + int reply_hash = 0 + ) + { + return ::dsn::replication::replication_app_client_base::write( + get_partition_index(*pr), + RPC_SIMPLE_KV_SIMPLE_KV_APPEND, + pr, + this, + &simple_kv_client::end_append2, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_append2( + ::dsn::error_code err, + std::shared_ptr<::dsn::replication::application::kv_pair>& pr, + std::shared_ptr& resp) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_APPEND err : " << err.to_string() << std::endl; + else + { + std::cout << "reply RPC_SIMPLE_KV_SIMPLE_KV_APPEND ok" << std::endl; + } + } + +}; + +} } } \ No newline at end of file diff --git a/src/apps/replication/exe/simple_kv.client.impl.cpp b/src/apps/replication/exe/simple_kv.client.impl.cpp new file mode 100644 index 0000000000..b50f6371fd --- /dev/null +++ b/src/apps/replication/exe/simple_kv.client.impl.cpp @@ -0,0 +1,58 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "simple_kv.client.impl.h" + +namespace dsn { + namespace replication { + namespace application { + + + simple_kv_client_impl::simple_kv_client_impl(const std::vector& meta_servers) + : simple_kv_client(meta_servers, "simple_kv") + { + } + + + simple_kv_client_impl::~simple_kv_client_impl(void) + { + + } + + int simple_kv_client_impl::get_partition_index(const std::string& key) + { + // TODO: + return 0; + } + + int simple_kv_client_impl::get_partition_index(const kv_pair& pr) + { + // TODO: + return 0; + } + } + } +} + diff --git a/src/apps/replication/exe/simple_kv.client.impl.h b/src/apps/replication/exe/simple_kv.client.impl.h new file mode 100644 index 0000000000..eb35f21c69 --- /dev/null +++ b/src/apps/replication/exe/simple_kv.client.impl.h @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include "simple_kv.client.h" + +namespace dsn { + namespace replication { + namespace application { + + class simple_kv_client_impl : public simple_kv_client + { + public: + simple_kv_client_impl(const std::vector& meta_servers); + ~simple_kv_client_impl(void); + + protected: + virtual int get_partition_index(const std::string& key); + virtual int get_partition_index(const kv_pair& pr); + }; + + } + } +}// namespace diff --git a/src/apps/replication/exe/simple_kv.code.definition.h b/src/apps/replication/exe/simple_kv.code.definition.h new file mode 100644 index 0000000000..5fb4de99fc --- /dev/null +++ b/src/apps/replication/exe/simple_kv.code.definition.h @@ -0,0 +1,37 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once +# include +# include "simple_kv.types.h" + +namespace dsn { namespace replication { namespace application { + // define RPC task code for service 'simple_kv' + DEFINE_TASK_CODE_RPC(RPC_SIMPLE_KV_SIMPLE_KV_READ, ::dsn::TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) + DEFINE_TASK_CODE_RPC(RPC_SIMPLE_KV_SIMPLE_KV_WRITE, ::dsn::TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) + DEFINE_TASK_CODE_RPC(RPC_SIMPLE_KV_SIMPLE_KV_APPEND, ::dsn::TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) + // test timer task code + DEFINE_TASK_CODE(LPC_SIMPLE_KV_TEST_TIMER, ::dsn::TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) +} } } diff --git a/src/apps/replication/exe/simple_kv.main.cpp b/src/apps/replication/exe/simple_kv.main.cpp new file mode 100644 index 0000000000..e8288e2601 --- /dev/null +++ b/src/apps/replication/exe/simple_kv.main.cpp @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +// apps +# include "simple_kv.app.example.h" +# include "simple_kv.server.impl.h" + +// tools +# include +# include +# include +# include +# include + +int main(int argc, char** argv) +{ + // register replication application provider + dsn::replication::register_replica_provider<::dsn::replication::application::simple_kv_service_impl>("simple_kv"); + + // register all possible services + dsn::service::system::register_service<::dsn::replication::meta_service_app>("meta"); + dsn::service::system::register_service<::dsn::replication::replication_service_app>("replica"); + dsn::service::system::register_service<::dsn::replication::application::simple_kv_client_app>("client"); + + // register all possible tools and toollets + dsn::tools::register_tool("nativerun"); + dsn::tools::register_tool("simulator"); + dsn::tools::register_toollet("tracer"); + dsn::tools::register_toollet("profiler"); + dsn::tools::register_toollet("fault_injector"); + + // register necessary components +#ifdef DSN_NOT_USE_DEFAULT_SERIALIZATION + dsn::tools::register_component_provider<::dsn::thrift_binary_message_parser>("thrift"); +#endif + + // specify what services and tools will run in config file, then run + dsn::service::system::run("config.ini", true); + return 0; +} diff --git a/src/apps/replication/exe/simple_kv.server.h b/src/apps/replication/exe/simple_kv.server.h new file mode 100644 index 0000000000..68371cc1fd --- /dev/null +++ b/src/apps/replication/exe/simple_kv.server.h @@ -0,0 +1,87 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once +# include +# include "simple_kv.code.definition.h" +# include + +namespace dsn { namespace replication { namespace application { +class simple_kv_service + : public ::dsn::replication::replication_app_base +{ +public: + simple_kv_service(::dsn::replication::replica* replica, ::dsn::configuration_ptr& config) + : ::dsn::replication::replication_app_base(replica, config) + { + open_service(); + } + + virtual ~simple_kv_service() + { + close_service(); + } + +protected: + // all service handlers to be implemented further + // RPC_SIMPLE_KV_SIMPLE_KV_READ + virtual void on_read(const std::string& key, ::dsn::service::rpc_replier& reply) + { + std::cout << "... exec RPC_SIMPLE_KV_SIMPLE_KV_READ ... (not implemented) " << std::endl; + std::string resp; + reply(resp); + } + // RPC_SIMPLE_KV_SIMPLE_KV_WRITE + virtual void on_write(const ::dsn::replication::application::kv_pair& pr, ::dsn::service::rpc_replier& reply) + { + std::cout << "... exec RPC_SIMPLE_KV_SIMPLE_KV_WRITE ... (not implemented) " << std::endl; + int32_t resp; + reply(resp); + } + // RPC_SIMPLE_KV_SIMPLE_KV_APPEND + virtual void on_append(const ::dsn::replication::application::kv_pair& pr, ::dsn::service::rpc_replier& reply) + { + std::cout << "... exec RPC_SIMPLE_KV_SIMPLE_KV_APPEND ... (not implemented) " << std::endl; + int32_t resp; + reply(resp); + } + +public: + void open_service() + { + this->register_async_rpc_handler(RPC_SIMPLE_KV_SIMPLE_KV_READ, "read", &simple_kv_service::on_read); + this->register_async_rpc_handler(RPC_SIMPLE_KV_SIMPLE_KV_WRITE, "write", &simple_kv_service::on_write); + this->register_async_rpc_handler(RPC_SIMPLE_KV_SIMPLE_KV_APPEND, "append", &simple_kv_service::on_append); + } + + void close_service() + { + this->unregister_rpc_handler(RPC_SIMPLE_KV_SIMPLE_KV_READ); + this->unregister_rpc_handler(RPC_SIMPLE_KV_SIMPLE_KV_WRITE); + this->unregister_rpc_handler(RPC_SIMPLE_KV_SIMPLE_KV_APPEND); + } +}; + +} } } \ No newline at end of file diff --git a/src/apps/replication/exe/simple_kv.server.impl.cpp b/src/apps/replication/exe/simple_kv.server.impl.cpp new file mode 100644 index 0000000000..082e672f65 --- /dev/null +++ b/src/apps/replication/exe/simple_kv.server.impl.cpp @@ -0,0 +1,313 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "simple_kv.server.impl.h" +#include +#include +#include + +namespace dsn { + namespace replication { + namespace application { + + simple_kv_service_impl::simple_kv_service_impl(replica* replica, configuration_ptr& cf) + : simple_kv_service(replica, cf) + { + _learn_file_name.clear(); + } + + // RPC_SIMPLE_KV_READ + void simple_kv_service_impl::on_read(const std::string& key, ::dsn::service::rpc_replier& reply) + { + zauto_lock l(_lock); + + auto it = _store.find(key); + if (it == _store.end()) + { + reply(""); + } + else + { + reply(it->second); + } + } + + // RPC_SIMPLE_KV_WRITE + void simple_kv_service_impl::on_write(const kv_pair& pr, ::dsn::service::rpc_replier& reply) + { + zauto_lock l(_lock); + _store[pr.key] = pr.value; + reply(ERR_SUCCESS); + } + + // RPC_SIMPLE_KV_APPEND + void simple_kv_service_impl::on_append(const kv_pair& pr, ::dsn::service::rpc_replier& reply) + { + zauto_lock l(_lock); + auto it = _store.find(pr.key); + if (it != _store.end()) + it->second.append(pr.value); + else + _store[pr.key] = pr.value; + reply(ERR_SUCCESS); + } + + int simple_kv_service_impl::open(bool create_new) + { + zauto_lock l(_lock); + if (create_new) + { + boost::filesystem::remove_all(dir()); + boost::filesystem::create_directory(dir()); + } + else + { + recover(); + } + return 0; + } + + int simple_kv_service_impl::close(bool clear_state) + { + zauto_lock l(_lock); + if (clear_state) + { + boost::filesystem::remove_all(dir()); + } + return 0; + } + + // checkpoint related + void simple_kv_service_impl::recover() + { + zauto_lock l(_lock); + + _store.clear(); + + decree maxVersion = 0; + std::string name; + boost::filesystem::directory_iterator endtr; + for (boost::filesystem::directory_iterator it(dir()); + it != endtr; + ++it) + { + auto s = it->path().filename().string(); + if (s.substr(0, strlen("checkpoint.")) != std::string("checkpoint.")) + continue; + + decree version = atol(s.substr(strlen("checkpoint.")).c_str()); + if (version > maxVersion) + { + maxVersion = version; + name = dir() + "/" + s; + } + } + + if (maxVersion > 0) + { + recover(name, maxVersion); + } + } + + void simple_kv_service_impl::recover(const std::string& name, decree version) + { + zauto_lock l(_lock); + + std::ifstream is(name.c_str()); + if (!is.is_open()) + return; + + + _store.clear(); + + uint32_t count; + is.read((char*)&count, sizeof(count)); + + for (uint32_t i = 0; i < count; i++) + { + std::string key; + std::string value; + + uint32_t sz; + is.read((char*)&sz, (uint32_t)sizeof(sz)); + key.resize(sz); + + is.read((char*)&key[0], sz); + + is.read((char*)&sz, (uint32_t)sizeof(sz)); + value.resize(sz); + + is.read((char*)&value[0], sz); + + _store[key] = value; + } + + _last_durable_decree = _last_committed_decree = version; + } + + int simple_kv_service_impl::flush(bool force) + { + zauto_lock l(_lock); + + if (last_committed_decree() == last_durable_decree()) + { + return ERR_SUCCESS; + } + + // TODO: should use async write instead + char name[256]; + sprintf(name, "%s/checkpoint.%lld", dir().c_str(), + static_cast(last_committed_decree())); + std::ofstream os(name); + + uint32_t count = (uint32_t)_store.size(); + os.write((const char*)&count, (uint32_t)sizeof(count)); + + for (auto it = _store.begin(); it != _store.end(); it++) + { + const std::string& k = it->first; + uint32_t sz = (uint32_t)k.length(); + + os.write((const char*)&sz, (uint32_t)sizeof(sz)); + os.write((const char*)&k[0], sz); + + const std::string& v = it->second; + sz = (uint32_t)v.length(); + + os.write((const char*)&sz, (uint32_t)sizeof(sz)); + os.write((const char*)&v[0], sz); + } + + _last_durable_decree = last_committed_decree(); + return ERR_SUCCESS; + } + + // helper routines to accelerate learning + int simple_kv_service_impl::get_learn_state(decree start, const blob& learn_req, __out_param learn_state& state) + { + ::dsn::binary_writer writer; + + zauto_lock l(_lock); + + int magic = 0xdeadbeef; + writer.write(magic); + + writer.write(_last_committed_decree.load()); + + dassert(_last_committed_decree >= 0, ""); + + int count = static_cast(_store.size()); + writer.write(count); + + for (auto it = _store.begin(); it != _store.end(); it++) + { + writer.write(it->first); + writer.write(it->second); + } + + auto bb = writer.get_buffer(); + auto buf = bb.buffer(); + + state.meta = blob(buf, static_cast(bb.data() - bb.buffer().get()), bb.length()); + + + //// Test Sample + //if (_learn_file_name.empty()) + //{ + // std::stringstream ss; + // ss << std::rand(); + // ss >> _learn_file_name; + // _learn_file_name = dir() + "/test_transfer_" + _learn_file_name + ".txt"; + // + // std::ofstream fout(_learn_file_name.c_str()); + // fout << "Test by Kit" << std::endl; + // fout.close(); + //} + // + //state.files.push_back(_learn_file_name); + + return ERR_SUCCESS; + } + + int simple_kv_service_impl::apply_learn_state(learn_state& state) + { + blob bb((const char*)state.meta.data(), 0, state.meta.length()); + + binary_reader reader(bb); + + zauto_lock l(_lock); + + _store.clear(); + + int magic; + reader.read(magic); + + dassert(magic == 0xdeadbeef, ""); + + decree decree; + reader.read(decree); + + dassert(decree >= 0, ""); + + int count; + reader.read(count); + + for (int i = 0; i < count; i++) + { + std::string key, value; + reader.read(key); + reader.read(value); + _store[key] = value; + } + + _last_committed_decree = decree; + _last_durable_decree = 0; + + flush(true); + + + bool ret = true; + for (auto itr = state.files.begin(); itr != state.files.end(); ++itr) + if (itr->find("test_transfer") != std::string::npos) + { + std::string fn = dir() + "/" + *itr; + ret = boost::filesystem::exists(fn); + if (!ret) break; + + std::ifstream fin(fn.c_str()); + std::string s; + getline(fin, s); + fin.close(); + ret = (s == "Test by Kit"); + // FileUtil::RemoveFile(fn.c_str()); + } + + if (ret) return ERR_SUCCESS; + else return ERR_LEARN_FILE_FALED; + } + + } + } +} // namespace diff --git a/src/apps/replication/exe/simple_kv.server.impl.h b/src/apps/replication/exe/simple_kv.server.impl.h new file mode 100644 index 0000000000..36dc775ba6 --- /dev/null +++ b/src/apps/replication/exe/simple_kv.server.impl.h @@ -0,0 +1,66 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include "simple_kv.server.h" + +namespace dsn { + namespace replication { + namespace application { + class simple_kv_service_impl : public simple_kv_service + { + public: + simple_kv_service_impl(replica* replica, configuration_ptr& config); + + // RPC_SIMPLE_KV_READ + virtual void on_read(const std::string& key, ::dsn::service::rpc_replier& reply); + // RPC_SIMPLE_KV_WRITE + virtual void on_write(const kv_pair& pr, ::dsn::service::rpc_replier& reply); + // RPC_SIMPLE_KV_APPEND + virtual void on_append(const kv_pair& pr, ::dsn::service::rpc_replier& reply); + + virtual int open(bool create_new); + virtual int close(bool clear_state); + virtual int flush(bool force); + + // helper routines to accelerate learning + virtual int get_learn_state(decree start, const blob& learn_req, __out_param learn_state& state); + virtual int apply_learn_state(learn_state& state); + + private: + void recover(); + void recover(const std::string& name, decree version); + + private: + typedef std::map simple_kv; + simple_kv _store; + zlock _lock; + std::string _learn_file_name; + }; + + } + } +} // namespace diff --git a/src/apps/replication/exe/simple_kv.thrift b/src/apps/replication/exe/simple_kv.thrift new file mode 100644 index 0000000000..63abea49f2 --- /dev/null +++ b/src/apps/replication/exe/simple_kv.thrift @@ -0,0 +1,14 @@ +namespace cpp dsn.replication.application + +struct kv_pair +{ + 1:string key; + 2:string value; +} + +service simple_kv +{ + string read(1:string key); + i32 write(2:kv_pair pr); + i32 append(2:kv_pair pr); +} diff --git a/src/apps/replication/exe/simple_kv.thrift.annotations b/src/apps/replication/exe/simple_kv.thrift.annotations new file mode 100644 index 0000000000..e06a3c06bb --- /dev/null +++ b/src/apps/replication/exe/simple_kv.thrift.annotations @@ -0,0 +1,14 @@ +; annotation format +;[type.name[[.subname]...]] +;key = vlaue + +[service.simple_kv] +stateful = true ; simple_kv is a stateful service + +[function.simple_kv.write] +write = true ; simple_kv.write is a write function + +[function.simple_kv.append] +write = true + + diff --git a/src/apps/replication/exe/simple_kv.types.h b/src/apps/replication/exe/simple_kv.types.h new file mode 100644 index 0000000000..5503364a6b --- /dev/null +++ b/src/apps/replication/exe/simple_kv.types.h @@ -0,0 +1,88 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +// +// uncomment the following line if you want to use +// data encoding/decoding from the original tool instead of rDSN +// in this case, you need to use these tools to generate +// type files with --gen=cpp etc. options +// +// !!! WARNING: not feasible for replicated service yet!!! +// +//# define DSN_NOT_USE_DEFAULT_SERIALIZATION + +# include + +# ifdef DSN_NOT_USE_DEFAULT_SERIALIZATION + +# include +# include "simple_kv_types.h" + +namespace dsn { namespace replication { namespace application { + // ---------- kv_pair ------------- + inline void marshall(::dsn::binary_writer& writer, const kv_pair& val) + { + boost::shared_ptr<::dsn::binary_writer_transport> transport(new ::dsn::binary_writer_transport(writer)); + ::apache::thrift::protocol::TBinaryProtocol proto(transport); + ::dsn::marshall_rpc_args(&proto, val, &kv_pair::write); + }; + + inline void unmarshall(::dsn::binary_reader& reader, __out_param kv_pair& val) + { + boost::shared_ptr<::dsn::binary_reader_transport> transport(new ::dsn::binary_reader_transport(reader)); + ::apache::thrift::protocol::TBinaryProtocol proto(transport); + ::dsn::unmarshall_rpc_args(&proto, val, &kv_pair::read); + }; + +} } } + + +# else // use rDSN's data encoding/decoding + +namespace dsn { namespace replication { namespace application { + // ---------- kv_pair ------------- + struct kv_pair + { + std::string key; + std::string value; + }; + + inline void marshall(::dsn::binary_writer& writer, const kv_pair& val) + { + marshall(writer, val.key); + marshall(writer, val.value); + }; + + inline void unmarshall(::dsn::binary_reader& reader, __out_param kv_pair& val) + { + unmarshall(reader, val.key); + unmarshall(reader, val.value); + }; + +} } } + +#endif diff --git a/src/apps/replication/exe/test.cmd b/src/apps/replication/exe/test.cmd new file mode 100644 index 0000000000..d6cd4cf29f --- /dev/null +++ b/src/apps/replication/exe/test.cmd @@ -0,0 +1,9 @@ +echo OFF +for /l %%x in (1, 1, 10) do ( + echo start test instance %%x + mkdir test-%%x + copy config.ini .\test-%%x + cd test-%%x + start ..\dsn.replication.simple_kv.exe + cd .. +) diff --git a/src/apps/replication/lib/CMakeLists.txt b/src/apps/replication/lib/CMakeLists.txt new file mode 100644 index 0000000000..e2b72ee022 --- /dev/null +++ b/src/apps/replication/lib/CMakeLists.txt @@ -0,0 +1,2 @@ +include_directories(AFTER ../client_lib ../../../dist/failure_detector) +dsn_add_library(dsn.replication) diff --git a/src/apps/replication/lib/mutation.cpp b/src/apps/replication/lib/mutation.cpp new file mode 100644 index 0000000000..e549ee866c --- /dev/null +++ b/src/apps/replication/lib/mutation.cpp @@ -0,0 +1,110 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "mutation.h" + + +namespace dsn { namespace replication { + +mutation::mutation() +{ + rpc_code = 0; + _private0 = 0; + _not_logged = 1; +} + +mutation::~mutation() +{ + clear_log_task(); +} + +void mutation::set_client_request(task_code code, message_ptr& request) +{ + ::dsn::blob buffer(request->reader().get_remaining_buffer()); + auto buf = buffer.buffer(); + blob bb(buf, static_cast(buffer.data() - buffer.buffer().get()), buffer.length()); + + client_request = request; + rpc_code = code; + data.updates.push_back(bb); +} + +/*static*/ mutation_ptr mutation::read_from(message_ptr& reader) +{ + mutation_ptr mu(new mutation()); + unmarshall(reader, mu->data); + + unmarshall(reader, mu->rpc_code); + + for (auto it = mu->data.updates.begin(); it != mu->data.updates.end(); it++) + { + void * buf = malloc(it->length()); + memcpy(buf, it->data(), it->length()); + ::dsn::blob bb((const char *)buf, 0, it->length()); + message_ptr msg(new message(bb, false)); + mu->client_request = msg; + } + + mu->_from_message = reader; + sprintf (mu->_name, "%lld.%lld", + static_cast(mu->data.header.ballot), + static_cast(mu->data.header.decree)); + return mu; +} + +void mutation::write_to(message_ptr& writer) +{ + marshall(writer, data); + marshall(writer, rpc_code); +} + +int mutation::clear_prepare_or_commit_tasks() +{ + int c = 0; + for (auto it = _prepare_or_commit_tasks.begin(); it != _prepare_or_commit_tasks.end(); it++) + { + it->second->cancel(true); + c++; + } + + _prepare_or_commit_tasks.clear(); + return c; +} + +int mutation::clear_log_task() +{ + if (_log_task != nullptr) + { + _log_task->cancel(true); + _log_task = nullptr; + return 1; + } + else + { + return 0; + } +} + +}} // namespace end diff --git a/src/apps/replication/lib/mutation.h b/src/apps/replication/lib/mutation.h new file mode 100644 index 0000000000..6fb468675f --- /dev/null +++ b/src/apps/replication/lib/mutation.h @@ -0,0 +1,104 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + + +#include "replication_common.h" +#include + +#pragma warning(disable: 4201) + +namespace dsn { namespace replication { + +class mutation : public ref_object +{ +public: + mutation(); + virtual ~mutation(); + + // state inquery + const char* name() const { return _name; } + bool is_logged() const { return _not_logged == 0; } + bool is_prepared() const { return _not_logged == 0; } + bool is_ready_for_commit() const { return _private0 == 0; } + message_ptr& owner_message() { return _from_message; } + unsigned int left_secondary_ack_count() const { return _left_secondary_ack_count; } + unsigned int left_potential_secondary_ack_count() const { return _left_potential_secondary_ack_count; } + task_ptr& log_task() { return _log_task; } + node_tasks& remote_tasks() { return _prepare_or_commit_tasks; } + + // state change + void set_id(ballot b, decree c); + void set_client_request(task_code code, message_ptr& request); + void set_logged() { dassert (!is_logged(), ""); _not_logged = 0; } + unsigned int decrease_left_secondary_ack_count() { return --_left_secondary_ack_count; } + unsigned int decrease_left_potential_secondary_ack_count() { return --_left_potential_secondary_ack_count; } + void set_left_secondary_ack_count(unsigned int count) { _left_secondary_ack_count = count; } + void set_left_potential_secondary_ack_count(unsigned int count) { _left_potential_secondary_ack_count = count; } + int clear_prepare_or_commit_tasks(); + int clear_log_task(); + + // reader & writer + static mutation_ptr read_from(message_ptr& reader); + void write_to(message_ptr& writer); + + // data + mutation_data data; + int rpc_code; + message_ptr client_request; + +private: + union + { + struct + { + unsigned int _not_logged : 1; + unsigned int _left_secondary_ack_count : 7; + unsigned int _left_potential_secondary_ack_count : 8; + }; + uint16_t _private0; + }; + + node_tasks _prepare_or_commit_tasks; + task_ptr _log_task; + + message_ptr _from_message; + char _name[40]; // ballot.decree +}; + +DEFINE_REF_OBJECT(mutation) + +// ---------------------- inline implementation ---------------------------- +inline void mutation::set_id(ballot b, decree c) +{ + data.header.ballot = b; + data.header.decree = c; + sprintf (_name, "%lld.%lld", static_cast(b), static_cast(c)); +} + +}} // namespace + +#pragma warning(default: 4201) diff --git a/src/apps/replication/lib/mutation_cache.cpp b/src/apps/replication/lib/mutation_cache.cpp new file mode 100644 index 0000000000..26b45b1a83 --- /dev/null +++ b/src/apps/replication/lib/mutation_cache.cpp @@ -0,0 +1,155 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "mutation_cache.h" +#include "mutation.h" + +namespace dsn { namespace replication { + +mutation_cache::mutation_cache(decree init_decree, int max_count) +{ + _max_count = max_count; + _array.resize(max_count, nullptr); + + reset(init_decree, false); +} + +mutation_cache::~mutation_cache() +{ + _array.clear(); +} + +error_code mutation_cache::put(mutation_ptr& mu) +{ + decree decree = mu->data.header.decree; + int delta = 0, tag = 0; + if (_interval == 0) + { + delta = 1; + tag = 0; + } + else if (decree > _end_decree) + { + delta = static_cast(decree - _end_decree); + tag = 1; + } + else if (decree < _start_decree) + { + delta = static_cast(_start_decree - decree); + tag = -1; + } + + if (delta + _interval > _max_count) + { + return ERR_CAPACITY_EXCEEDED; + } + + int idx = ((decree - _end_decree) + _end_idx + _max_count) % _max_count; + mutation_ptr old = _array[idx]; + if (old != nullptr) + { + dassert (old->data.header.ballot <= mu->data.header.ballot, ""); + } + + _array[idx] = mu; + + // update tracking data + _interval += delta; + if (old != nullptr) + { + old = nullptr; + } + + if (tag > 0) + { + _end_idx = idx; + _end_decree = decree; + } + else if (tag < 0) + { + _start_idx = idx; + _start_decree = decree; + } + else if (_interval == 1) + { + _start_idx = _end_idx = idx; + _start_decree = _end_decree = decree; + } + return ERR_SUCCESS; +} + +mutation_ptr mutation_cache::pop_min() +{ + if (_interval > 0) + { + mutation_ptr mu = _array[_start_idx]; + _array[_start_idx] = nullptr; + + _interval--; + _start_idx = (_start_idx + 1) % _max_count; + + if (_interval == 0) + { + //TODO: FIXE ME LATER + //dassert (_total_size_bytes == 0, ""); + + _end_decree = _start_decree; + _end_idx = _start_idx; + } + else + { + _start_decree++; + } + return mu; + } + else + { + return nullptr; + } +} + +void mutation_cache::reset(decree init_decree, bool clear_mutations) +{ + _start_decree = _end_decree = init_decree; + _start_idx = _end_idx = 0; + _interval = 0; + + if (clear_mutations) + { + for (int i = 0; i < _max_count; i++) + _array[i] = nullptr; + } +} + +mutation_ptr mutation_cache::get_mutation_by_decree(decree decree) +{ + if (decree < _start_decree || decree > _end_decree) + return nullptr; + else + return _array[(_start_idx + (decree - _start_decree) + _max_count) % _max_count]; +} + + +}} // namespace end diff --git a/src/apps/replication/lib/mutation_cache.h b/src/apps/replication/lib/mutation_cache.h new file mode 100644 index 0000000000..d70b1fc0eb --- /dev/null +++ b/src/apps/replication/lib/mutation_cache.h @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + + +#include "replication_common.h" +#include + +namespace dsn { namespace replication { + +class mutation_cache +{ +public: + mutation_cache(decree init_decree, int max_count); + ~mutation_cache(); + + error_code put(mutation_ptr& mu); + mutation_ptr pop_min(); + mutation_ptr get_mutation_by_decree(decree decree); + void reset(decree init_decree, bool clear_mutations); + + decree min_decree() const { return _start_decree; } + decree max_decree() const { return _end_decree; } + int count() const { return _interval; } + +private: + std::vector _array; + int _max_count; + + int _interval; + + int _start_idx; + int _end_idx; + decree _start_decree; + decree _end_decree; +}; + +}} // namespace + diff --git a/src/apps/replication/lib/mutation_log.cpp b/src/apps/replication/lib/mutation_log.cpp new file mode 100644 index 0000000000..1bcc0d5a8d --- /dev/null +++ b/src/apps/replication/lib/mutation_log.cpp @@ -0,0 +1,817 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "mutation_log.h" +#include +#ifdef _WIN32 +#include +#endif + +#define __TITLE__ "mutation_log" + +namespace dsn { namespace replication { + + using namespace ::dsn::service; + +mutation_log::mutation_log(uint32_t log_buffer_size_mb, uint32_t log_pending_max_ms, uint32_t max_log_file_mb, bool batch_write, int write_task_max_count) +{ + _log_buffer_size_bytes = log_buffer_size_mb * 1024 * 1024; + _log_pending_max_milliseconds = log_pending_max_ms; + _max_log_file_size_in_bytes = ((int64_t)max_log_file_mb) * 1024L * 1024L; + _batch_write = batch_write; + _write_task_number = write_task_max_count; + + _last_file_number = 0; + _global_start_offset = 0; + _global_end_offset = 0; + + _last_log_file = nullptr; + _current_log_file = nullptr; + _dir = ""; + + _max_staleness_for_commit = 0; +} + +mutation_log::~mutation_log() +{ + close(); +} + +void mutation_log::reset() +{ + _last_file_number = 0; + _global_start_offset = 0; + _global_end_offset = 0; + + _last_log_file = nullptr; + _current_log_file = nullptr; + + for (auto it = _log_files.begin(); it != _log_files.end(); it++) + { + it->second->close(); + } + _log_files.clear(); +} + +int mutation_log::initialize(const char* dir) +{ + zauto_lock l(_lock); + + //create dir if necessary + if (!boost::filesystem::exists(dir) && !boost::filesystem::create_directory(dir)) + { + derror ("open mutation_log: create log path failed"); + return ERR_FILE_OPERATION_FAILED; + } + + _dir = std::string(dir); + + + _last_file_number = 0; + _log_files.clear(); + + boost::filesystem::directory_iterator endtr; + + for (boost::filesystem::directory_iterator it(dir); + it != endtr; + ++it) + { + std::string fullPath = it->path().string(); + log_file_ptr log = log_file::opend_read(fullPath.c_str()); + if (log == nullptr) + { + dwarn ("Skip file %s during log init", fullPath.c_str()); + continue; + } + + dassert (_log_files.find(log->index()) == _log_files.end(), ""); + _log_files[log->index()] = log; + } + + if (_log_files.size() > 0) + { + _last_file_number = _log_files.begin()->first - 1; + _global_start_offset = _log_files.begin()->second->start_offset(); + } + + for (auto it = _log_files.begin(); it != _log_files.end(); it++) + { + if (++_last_file_number != it->first) + { + derror ("log file missing with index %u", _last_file_number); + return ERR_OBJECT_NOT_FOUND; + } + + _global_end_offset = it->second->end_offset(); + } + + return ERR_SUCCESS; +} + +int mutation_log::create_new_log_file() +{ + //dassert (_lock.IsHeldByCurrentThread(), ""); + + if (_current_log_file != nullptr) + { + _last_log_file = _current_log_file; + dassert (_current_log_file->end_offset() == _global_end_offset, ""); + } + + log_file_ptr logFile = log_file::create_write(_dir.c_str(), _last_file_number + 1, _global_end_offset, _max_staleness_for_commit, _write_task_number); + if (logFile == nullptr) + { + derror ("cannot create log file with index %u", _last_file_number); + return ERR_FILE_OPERATION_FAILED; + } + + derror ("create new log file %s", logFile->path().c_str()); + + _last_file_number++; + dassert (_log_files.find(_last_file_number) == _log_files.end(), ""); + _log_files[_last_file_number] = logFile; + + dassert (logFile->end_offset() == logFile->start_offset(), ""); + dassert (_global_end_offset == logFile->end_offset(), ""); + + _current_log_file = logFile; + + create_new_pending_buffer(); + auto len = logFile->write_header(_pending_write, _init_prepared_decrees, + static_cast(_log_buffer_size_bytes)); + _global_end_offset += len; + dassert (_pending_write->total_size() == len + message_header::serialized_size(), ""); + + return ERR_SUCCESS; +} + +void mutation_log::create_new_pending_buffer() +{ + dassert (_pending_write == nullptr, ""); + dassert (_pending_write_callbacks == nullptr, ""); + dassert (_pending_write_timer == nullptr, ""); + + _pending_write = message::create_request(RPC_PREPARE, _log_pending_max_milliseconds); + _pending_write_callbacks.reset(new std::list); + + if (_batch_write) + { + _pending_write_timer = tasking::enqueue( + LPC_MUTATION_LOG_PENDING_TIMER, + this, + std::bind(&mutation_log::internal_pending_write_timer, this, _pending_write->header().id), + -1, + _log_pending_max_milliseconds + ); + } + + dassert (_pending_write->total_size() == message_header::serialized_size(), ""); + _global_end_offset += message_header::serialized_size(); +} + +void mutation_log::internal_pending_write_timer(uint64_t id) +{ + zauto_lock l(_lock); + dassert (nullptr != _pending_write, ""); + dassert (_pending_write->header().id == id, ""); + dassert (task::get_current_task() == _pending_write_timer, ""); + + _pending_write_timer = nullptr; + write_pending_mutations(); +} + +int mutation_log::write_pending_mutations(bool create_new_log_when_necessary) +{ + dassert (_pending_write != nullptr, ""); + dassert (_pending_write_timer == nullptr, ""); + dassert (_pending_write_callbacks != nullptr, ""); + + _pending_write->seal(true); + + auto bb = _pending_write->writer().get_buffer(); + uint64_t offset = end_offset() - bb.length(); + auto buf = bb.buffer(); + blob bb2(buf, bb.length()); + + task_ptr aio = _current_log_file->write_log_entry( + bb2, + LPC_AIO_IMMEDIATE_CALLBACK, + this, + std::bind( + &mutation_log::internal_write_callback, + std::placeholders::_1, + std::placeholders::_2, + _pending_write_callbacks, bb2), + offset, + -1 + ); + + if (aio == nullptr) + { + internal_write_callback(ERR_FILE_OPERATION_FAILED, 0, _pending_write_callbacks, bb2); + } + else + { + dassert (_global_end_offset == _current_log_file->end_offset(), ""); + } + + _pending_write = nullptr; + _pending_write_callbacks = nullptr; + _pending_write_timer = nullptr; + + if (aio == nullptr) + { + return ERR_FILE_OPERATION_FAILED; + } + + if (create_new_log_when_necessary && _current_log_file->end_offset() - _current_log_file->start_offset() >= _max_log_file_size_in_bytes) + { + int ret = create_new_log_file(); + if (ERR_SUCCESS != ret) + { + derror ("create new log file failed, err = %d", ret); + } + return ret; + } + return ERR_SUCCESS; +} + +void mutation_log::internal_write_callback(error_code err, uint32_t size, mutation_log::pending_callbacks_ptr callbacks, blob data) +{ + for (auto it = callbacks->begin(); it != callbacks->end(); it++) + { + (*it)->enqueue(err, size, nullptr); + } +} + +/* +TODO: when there is a log error, the server cannot contain any primary or secondary any more! +*/ +int mutation_log::replay(ReplayCallback callback) +{ + zauto_lock l(_lock); + + int64_t offset = start_offset(); + int err = ERR_SUCCESS; + for (auto it = _log_files.begin(); it != _log_files.end(); it++) + { + log_file_ptr log = it->second; + + if (log->start_offset() != offset) + { + derror("offset mismatch in log file offset and global offset %lld vs %lld", log->start_offset(), offset); + return ERR_FILE_OPERATION_FAILED; + } + + _last_log_file = log; + + ::dsn::blob bb; + err = log->read_next_log_entry(bb); + if (err != ERR_SUCCESS) + { + if (err == ERR_HANDLE_EOF) + { + err = ERR_SUCCESS; + continue; + } + + derror( + "read log header failed for %s, err = %x", log->path().c_str(), err); + break; + } + + + message_ptr msg(new message(bb)); + offset += message_header::serialized_size(); + + if (!msg->is_right_body()) + { + derror("data read crc check failed at offset %llu", offset); + return ERR_WRONG_CHECKSUM; + } + + offset += log->read_header(msg); + + while (true) + { + while (!msg->reader().is_eof()) + { + auto oldSz = msg->reader().get_remaining_size(); + mutation_ptr mu = mutation::read_from(msg); + dassert (nullptr != mu, ""); + mu->set_logged(); + + if (mu->data.header.log_offset != offset) + { + derror("offset mismatch in log entry and mutation %lld vs %lld", offset, mu->data.header.log_offset); + return ERR_FILE_OPERATION_FAILED; + } + + callback(mu); + + offset += oldSz - msg->reader().get_remaining_size(); + } + + err = log->read_next_log_entry(bb); + if (err != ERR_SUCCESS) + { + if (err == ERR_HANDLE_EOF) + { + err = ERR_SUCCESS; + break; + } + + derror( + "read log entry failed for %s, err = %x", log->path().c_str(), err); + break; + } + + msg = new message(bb); + offset += message_header::serialized_size(); + + if (!msg->is_right_body()) + { + derror("data read crc check failed at offset %llu", offset); + return ERR_WRONG_CHECKSUM; + } + } + + log->close(); + + // tail data corruption is checked by next file's offset checking + if (err != ERR_INVALID_DATA && err != ERR_SUCCESS) + break; + } + + if (err == ERR_INVALID_DATA && offset + _last_log_file->header().log_buffer_size_bytes >= end_offset()) + { + // remove bad data at tail, but still we may lose data so error code remains unchanged + _global_end_offset = offset; + } + else if (err == ERR_SUCCESS) + { + dassert (end_offset() == offset, ""); + } + + return err; +} + +int mutation_log::start_write_service(multi_partition_decrees& initMaxDecrees, int max_staleness_for_commit) +{ + zauto_lock l(_lock); + + _init_prepared_decrees = initMaxDecrees; + _max_staleness_for_commit = max_staleness_for_commit; + + dassert (_current_log_file == nullptr, ""); + return create_new_log_file(); +} + +void mutation_log::close() +{ + while (true) + { + zauto_lock l(_lock); + + if (nullptr != _pending_write_timer) + { + if (_pending_write_timer->cancel(false)) + { + _pending_write_timer = nullptr; + write_pending_mutations(false); + dassert (nullptr == _pending_write_timer, ""); + } + else + { + std::this_thread::sleep_for(std::chrono::milliseconds(0)); + continue; + } + } + + if (nullptr != _current_log_file) + { + _current_log_file->close(); + _current_log_file = nullptr; + } + break; + } +} + +task_ptr mutation_log::append(mutation_ptr& mu, + task_code callback_code, + servicelet* callback_host, + aio_handler callback, + int hash) +{ + zauto_lock l(_lock); + + if (nullptr == _current_log_file) return nullptr; + + auto it = _init_prepared_decrees.find(mu->data.header.gpid); + if (it != _init_prepared_decrees.end()) + { + if (it->second < mu->data.header.decree) + { + it->second = mu->data.header.decree; + } + } + else + { + _init_prepared_decrees[mu->data.header.gpid] = mu->data.header.decree; + } + + if (_pending_write == nullptr) + { + create_new_pending_buffer(); + } + + auto oldSz = _pending_write->total_size(); + mu->data.header.log_offset = end_offset(); + mu->write_to(_pending_write); + _global_end_offset += _pending_write->total_size() - oldSz; + + aio_task_ptr tsk(new file::internal_use_only::service_aio_task(callback_code, callback_host, callback, hash)); + + _pending_write_callbacks->push_back(tsk); + + /*if (dsn::service::spec().traceOptions.PathTracing) + { + ddebug( + "BATCHTHROUGH mutation write with io callback DstTaskId = %016llx", task->TaskId() + ); + }*/ + + // printf ("append: %llu, offset = %llu, global = %llu, pendingSize = %u\n", + // mu->data.header.decree, mu->data.header.log_offset, _global_end_offset, _pending_write->total_size()); + + if (!_batch_write) + { + write_pending_mutations(); + } + else if ((uint32_t)_pending_write->total_size() >= _log_buffer_size_bytes) + { + if (_pending_write_timer->cancel(false)) + { + _pending_write_timer = nullptr; + write_pending_mutations(); + } + } + + return tsk; +} + +void mutation_log::on_partition_removed(global_partition_id gpid) +{ + zauto_lock l(_lock); + _init_prepared_decrees.erase(gpid); +} + +int mutation_log::garbage_collection(multi_partition_decrees& durable_decrees) +{ + std::map files; + std::map::reverse_iterator itr; + + { + zauto_lock l(_lock); + files = _log_files; + if (nullptr != _current_log_file) files.erase(_current_log_file->index()); + } + + for (itr = files.rbegin(); itr != files.rend(); itr++) + { + log_file_ptr log = itr->second; + + bool deleteOlderFiles = true; + for (auto it2 = durable_decrees.begin(); it2 != durable_decrees.end(); it2++) + { + global_partition_id gpid = it2->first; + decree lastDurableDecree = it2->second; + + auto it3 = log->init_prepare_decrees().find(gpid); + if (it3 == log->init_prepare_decrees().end()) + { + // new partition, ok to delete older logs + } + else + { + decree initPrepareDecree = it3->second; + decree maxPrepareDecreeBeforeThis = initPrepareDecree; + + // when all possible decress are covered by durable decress + if (lastDurableDecree >= maxPrepareDecreeBeforeThis) + { + // ok to delete older logs + } + else + { + deleteOlderFiles = false; + break; + } + } + } + + if (deleteOlderFiles) + { + break; + } + } + + if (itr != files.rend()) itr++; + + int count = 0; + for (; itr != files.rend(); itr++) + { + itr->second->close(); + + ddebug( + "remove log segment %s", itr->second->path().c_str()); + + std::string newName = itr->second->path() + ".removed"; + + boost::filesystem::rename(itr->second->path().c_str(), newName.c_str()); + + count++; + + { + zauto_lock l(_lock); + _log_files.erase(itr->first); + } + + /* + if (!::MoveFileExA(itr->second->path().c_str(), nullptr, MOVEFILE_WRITE_THROUGH)) + { + int err = GetLastError(); + LogR(log_level_Error, "mutation_log::truncate error. error: %d", err); + return err; + } + */ + } + + return count; +} + + +std::map& mutation_log::get_logfiles_for_test() +{ + return _log_files; +} + + +//------------------- log_file -------------------------- +/*static */log_file_ptr log_file::opend_read(const char* path) +{ + std::string pt = std::string(path); + auto pos = pt.find_last_of('/'); + if (pos == std::string::npos) + { + dwarn( "Invalid log path %s", path); + return nullptr; + } + + // log.index.startOffset + std::string name = pt.substr(pos + 1); + if (name.length() < strlen("log.") + || name.substr(0, strlen("log.")) != std::string("log.") + || (name.length() > strlen(".removed") && name.substr(name.length() - strlen(".removed")) == std::string(".removed")) + ) + { + dwarn( "Invalid log path %s", path); + return nullptr; + } + + pos = name.find_first_of('.'); + auto pos2 = name.find_first_of('.', pos + 1); + if (pos2 == std::string::npos) + { + dwarn( "Invalid log path %s", path); + return nullptr; + } + + handle_t hFile = (handle_t)::open(path, O_RDONLY, 0); + + if (hFile == 0) + { + dwarn("open log %s failed", path); + return nullptr; + } + + + int index = atoi(name.substr(pos + 1, pos2 - pos - 1).c_str()); + int64_t startOffset = atol(name.substr(pos2 + 1).c_str()); + + return new log_file(path, hFile, index, startOffset, 0, true); +} + +/*static*/ log_file_ptr log_file::create_write(const char* dir, int index, int64_t startOffset, int max_staleness_for_commit, int write_task_max_count) +{ + char path[512]; + sprintf (path, "%s/log.%u.%lld", dir, index, static_cast(startOffset)); + + handle_t hFile = dsn::service::file::open(path, O_RDWR | O_CREAT, 0666); + if (hFile == 0) + { + dwarn("create log %s failed", path); + return nullptr; + } + + return new log_file(path, hFile, index, startOffset, max_staleness_for_commit, false, write_task_max_count); +} + +log_file::log_file(const char* path, handle_t handle, int index, int64_t startOffset, int max_staleness_for_commit, bool isRead, int write_task_max_count) +{ + _start_offset = startOffset; + _end_offset = startOffset; + _handle = handle; + _is_read = isRead; + _path = path; + _index = index; + memset(&_header, 0, sizeof(_header)); + _header.max_staleness_for_commit = max_staleness_for_commit; + _write_task_itr = 0; + _write_tasks.resize(write_task_max_count); + + if (isRead) + { + boost::filesystem::path cp(_path); + _end_offset += boost::filesystem::file_size(cp); + } +} + +void log_file::close() +{ + for (size_t itr = 0; itr < _write_tasks.size(); ++itr) + { + if (_write_tasks.at(itr) != nullptr) + { + _write_tasks.at(itr)->wait(); + _write_tasks.at(itr) = nullptr; + } + } + + if (0 != _handle) + { + if (_is_read) + ::close((int)(_handle)); + else + dsn::service::file::close(_handle); + + _handle = 0; + } +} + +int log_file::read_next_log_entry(__out_param ::dsn::blob& bb) +{ + dassert (_is_read, ""); + + std::shared_ptr hdrBuffer(new char[message_header::serialized_size()]); + + if (message_header::serialized_size() != ::read( + (int)(_handle), + hdrBuffer.get(), + message_header::serialized_size() + )) + { + return ERR_FILE_OPERATION_FAILED; + } + + message_header hdr; + ::dsn::blob bb2(hdrBuffer, message_header::serialized_size()); + ::dsn::binary_reader reader(bb2); + hdr.unmarshall(reader); + + if (!hdr.is_right_header((char*)hdrBuffer.get())) + { + derror("invalid data header"); + return ERR_INVALID_DATA; + } + + std::shared_ptr data(new char[message_header::serialized_size() + hdr.body_length]); + memcpy(data.get(), hdrBuffer.get(), message_header::serialized_size()); + bb.assign(data, 0, message_header::serialized_size() + hdr.body_length); + + if (hdr.body_length != ::read( + (int)(_handle), + (void*)((char*)bb.data() + message_header::serialized_size()), + hdr.body_length + )) + { + return ERR_FILE_OPERATION_FAILED; + } + + return ERR_SUCCESS; +} + +aio_task_ptr log_file::write_log_entry( + blob& bb, + task_code evt, // to indicate which thread pool to execute the callback + servicelet* callback_host, + aio_handler callback, + int64_t offset, + int hash + ) +{ + dassert (!_is_read, ""); + dassert (offset == end_offset(), ""); + + auto task = file::write( + _handle, + bb.data(), + bb.length(), + offset - start_offset(), + evt, + callback_host, + callback, + hash + ); + + _end_offset = offset + bb.length(); + + //printf ("WriteBB: size = %u, startoffset = %llu, endOffset = %llu\n", bb.length(), offset, _end_offset); + + // !!! dangerous, we are in the middle of a local lock + // we already have flow control on maximum on-the-fly prepare requests, so flow control here can be disabled + /*if (_write_tasks.at(_write_task_itr) != nullptr) + { + _write_tasks.at(_write_task_itr)->wait(); + } + + _write_tasks.at(_write_task_itr) = task; + _write_task_itr = (_write_task_itr < static_cast_write_tasks.size() - 1) ? _write_task_itr + 1 : 0;*/ + + return task; +} + +int log_file::read_header(message_ptr& reader) +{ + + reader->reader().read_pod(_header); + + int count; + reader->reader().read(count); + for (int i = 0; i < count; i++) + { + global_partition_id gpid; + decree decree; + + reader->reader().read_pod(gpid); + reader->reader().read(decree); + + _init_prepared_decrees[gpid] = decree; + } + + return static_cast( + sizeof(_header) + sizeof(count) + + (sizeof(global_partition_id) + sizeof(decree))*count + ); +} + +int log_file::write_header(message_ptr& writer, multi_partition_decrees& initMaxDecrees, int bufferSizeBytes) +{ + _init_prepared_decrees = initMaxDecrees; + + _header.magic = 0xdeadbeef; + _header.version = 0x1; + _header.start_global_offset = start_offset(); + _header.log_buffer_size_bytes = bufferSizeBytes; + // staleness set in ctor + + writer->writer().write_pod(_header); + + int count = static_cast(_init_prepared_decrees.size()); + writer->writer().write(count); + for (auto it = _init_prepared_decrees.begin(); it != _init_prepared_decrees.end(); it++) + { + writer->writer().write_pod(it->first); + writer->writer().write(it->second); + } + + return static_cast( + sizeof(_header)+sizeof(count) + +(sizeof(global_partition_id)+sizeof(decree))*count + ); +} + +}} // end namespace diff --git a/src/apps/replication/lib/mutation_log.h b/src/apps/replication/lib/mutation_log.h new file mode 100644 index 0000000000..5765187159 --- /dev/null +++ b/src/apps/replication/lib/mutation_log.h @@ -0,0 +1,209 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include "replication_common.h" +#include "mutation.h" + +namespace dsn { namespace replication { + +#define INVALID_FILENUMBER (0) +#define MAX_LOG_FILESIZE (32) + +class log_file; +typedef boost::intrusive_ptr log_file_ptr; + +typedef std::map multi_partition_decrees; + +class mutation_log : public virtual servicelet +{ +public: + // mutationPtr + typedef std::function ReplayCallback; + +public: + // + // ctors + // + mutation_log( + uint32_t log_buffer_size_mb, + uint32_t log_pending_max_ms, + uint32_t max_log_file_mb = (uint64_t) MAX_LOG_FILESIZE, + bool batch_write = true, + int write_task_max_count = 2 + ); + virtual ~mutation_log(); + + // + // initialization + // + int initialize(const char* dir); + int replay(ReplayCallback callback); + void reset(); + int start_write_service(multi_partition_decrees& initMaxDecrees, int max_staleness_for_commit); + void close(); + + // + // log mutation + // + // return value: nullptr for error + task_ptr append(mutation_ptr& mu, + task_code callback_code, + servicelet* callback_host, + aio_handler callback, + int hash = 0); + + // Remove entry from m_initPreparedDecrees when a partition is removed. + void on_partition_removed(global_partition_id gpid); + + // + // garbage collection logs that are already covered by durable state on disk, return deleted log segment count + // + int garbage_collection(multi_partition_decrees& durable_decrees); + + // + // other inquiry routines + const std::string& dir() const {return _dir;} + int64_t end_offset() const { return _global_end_offset; } + int64_t start_offset() const { return _global_start_offset; } + + std::map& get_logfiles_for_test(); + +private: + // + // internal helpers + // + typedef std::shared_ptr> pending_callbacks_ptr; + + int create_new_log_file(); + void create_new_pending_buffer(); + void internal_pending_write_timer(uint64_t id); + static void internal_write_callback(error_code err, uint32_t size, pending_callbacks_ptr callbacks, blob data); + int write_pending_mutations(bool create_new_log_when_necessary = true); + +private: + + zlock _lock; + int64_t _max_log_file_size_in_bytes; + std::string _dir; + bool _batch_write; + + // write & read + int _last_file_number; + std::map _log_files; + log_file_ptr _last_log_file; + log_file_ptr _current_log_file; + int64_t _global_start_offset; + int64_t _global_end_offset; + + // gc + multi_partition_decrees _init_prepared_decrees; + int _max_staleness_for_commit; + + // bufferring + uint32_t _log_buffer_size_bytes; + uint32_t _log_pending_max_milliseconds; + + message_ptr _pending_write; + pending_callbacks_ptr _pending_write_callbacks; + task_ptr _pending_write_timer; + + int _write_task_number; +}; + +class log_file : public ref_object +{ +public: + struct log_file_header + { + int32_t magic; + int32_t version; + int32_t header_size; + int32_t max_staleness_for_commit; + int32_t log_buffer_size_bytes; + int64_t start_global_offset; + }; + +public: + ~log_file() { close(); } + + // + // file operations + // + static log_file_ptr opend_read(const char* path); + static log_file_ptr create_write(const char* dir, int index, int64_t startOffset, int max_staleness_for_commit, int write_task_max_count = 2); + void close(); + + // + // read routines + // + int read_next_log_entry(__out_param ::dsn::blob& bb); + + // + // write routines + // + // return value: nullptr for error or immediate success (using ::GetLastError to get code), otherwise it is pending + aio_task_ptr write_log_entry( + blob& bb, + task_code evt, // to indicate which thread pool to execute the callback + servicelet* callback_host, + aio_handler callback, + int64_t offset, + int hash + ); + + // others + int64_t end_offset() const { return _end_offset; } + int64_t start_offset() const { return _start_offset; } + int index() const { return _index; } + const std::string& path() const { return _path; } + const multi_partition_decrees& init_prepare_decrees() { return _init_prepared_decrees; } + const log_file_header& header() const { return _header;} + + int read_header(message_ptr& msg); + int write_header(message_ptr& msg, multi_partition_decrees& initMaxDecrees, int bufferSizeBytes); + +private: + log_file(const char* path, handle_t handle, int index, int64_t startOffset, int max_staleness_for_commit, bool isRead, int write_task_max_count = 2); + +protected: + int64_t _start_offset; + int64_t _end_offset; + handle_t _handle; + bool _is_read; + std::string _path; + int _index; + std::vector _write_tasks; + int _write_task_itr; + + // for gc + multi_partition_decrees _init_prepared_decrees; + log_file_header _header; +}; + +DEFINE_REF_OBJECT(log_file) + +}} // namespace diff --git a/src/apps/replication/lib/prepare_list.cpp b/src/apps/replication/lib/prepare_list.cpp new file mode 100644 index 0000000000..dcbcfbb3f9 --- /dev/null +++ b/src/apps/replication/lib/prepare_list.cpp @@ -0,0 +1,177 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "prepare_list.h" +#include "mutation.h" + +#define __TITLE__ "prepare_list" + +namespace dsn { namespace replication { + +prepare_list::prepare_list( + decree init_decree, int max_count, + mutation_committer committer) + : mutation_cache(init_decree, max_count) +{ + _committer = committer; + _lastCommittedDecree = 0; +} + +void prepare_list::sanity_check() +{ + dassert ( + last_committed_decree() <= min_decree(), "" + ); +} + +void prepare_list::reset(decree init_decree) +{ + _lastCommittedDecree = init_decree; + mutation_cache::reset(init_decree, true); +} + +void prepare_list::truncate(decree init_decree) +{ + while (min_decree() <= init_decree && count() > 0) + { + pop_min(); + } + _lastCommittedDecree = init_decree; +} + +error_code prepare_list::prepare(mutation_ptr& mu, partition_status status) +{ + dassert (mu->data.header.decree > last_committed_decree(), ""); + + error_code err; + switch (status) + { + case PS_PRIMARY: + return mutation_cache::put(mu); + + case PS_SECONDARY: + commit(mu->data.header.last_committed_decree, true); + err = mutation_cache::put(mu); + dassert (err == ERR_SUCCESS, ""); + return err; + + case PS_POTENTIAL_SECONDARY: + while (true) + { + err = mutation_cache::put(mu); + if (err == ERR_CAPACITY_EXCEEDED) + { + dassert (min_decree() == last_committed_decree() + 1, ""); + dassert (mu->data.header.last_committed_decree > last_committed_decree(), ""); + commit (last_committed_decree() + 1, true); + } + else + break; + } + dassert (err == ERR_SUCCESS, ""); + return err; + + case PS_INACTIVE: // only possible during init + err = ERR_SUCCESS; + if (mu->data.header.last_committed_decree > max_decree()) + { + reset(mu->data.header.last_committed_decree); + } + else if (mu->data.header.last_committed_decree > _lastCommittedDecree) + { + for (decree d = last_committed_decree() + 1; d <= mu->data.header.last_committed_decree; d++) + { + _lastCommittedDecree++; + if (count() == 0) + break; + + if (d == min_decree()) + { + mutation_ptr mu2 = get_mutation_by_decree(d); + pop_min(); + if (mu2 != nullptr) _committer(mu2); + } + } + + dassert (_lastCommittedDecree == mu->data.header.last_committed_decree, ""); + sanity_check(); + } + + err = mutation_cache::put(mu); + dassert (err == ERR_SUCCESS, ""); + return err; + + default: + dassert (false, ""); + return 0; + } +} + +// +// ordered commit +// +bool prepare_list::commit(decree d, bool force) +{ + if (d <= last_committed_decree()) + return false; + + if (!force) + { + if (d != last_committed_decree() + 1) + return false; + + mutation_ptr mu = get_mutation_by_decree(last_committed_decree() + 1); + + while (mu != nullptr && mu->is_ready_for_commit()) + { + _lastCommittedDecree++; + _committer(mu); + + dassert (mutation_cache::min_decree() == _lastCommittedDecree, ""); + pop_min(); + + mu = mutation_cache::get_mutation_by_decree(_lastCommittedDecree + 1); + } + } + else + { + for (decree d0 = last_committed_decree() + 1; d0 <= d; d0++) + { + mutation_ptr mu = get_mutation_by_decree(d0); + dassert (mu != nullptr && mu->is_prepared(), ""); + + _lastCommittedDecree++; + _committer(mu); + + dassert (mutation_cache::min_decree() == _lastCommittedDecree, ""); + pop_min(); + } + } + + sanity_check(); + return true; +} + +}} // namespace end diff --git a/src/apps/replication/lib/prepare_list.h b/src/apps/replication/lib/prepare_list.h new file mode 100644 index 0000000000..f38f82486a --- /dev/null +++ b/src/apps/replication/lib/prepare_list.h @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include "mutation_cache.h" + +namespace dsn { namespace replication { + + +class prepare_list : public mutation_cache +{ +public: + typedef std::function mutation_committer; + +public: + prepare_list( + decree init_decree, int max_count, + mutation_committer committer); + + decree last_committed_decree() const { return _lastCommittedDecree; } + void reset(decree init_decree); + void truncate(decree init_decree); + + // + // for two-phase commit + // + error_code prepare(mutation_ptr& mu, partition_status status); // unordered prepare + bool commit(decree decree, bool force); // ordered commit + +private: + void sanity_check(); + +private: + decree _lastCommittedDecree; + mutation_committer _committer; +}; + +}} // namespace + diff --git a/src/apps/replication/lib/replica.cpp b/src/apps/replication/lib/replica.cpp new file mode 100644 index 0000000000..6be42b2fc3 --- /dev/null +++ b/src/apps/replication/lib/replica.cpp @@ -0,0 +1,238 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replica.h" +#include "mutation.h" +#include "mutation_log.h" +#include "replica_stub.h" + +#define __TITLE__ "replica" + +namespace dsn { namespace replication { + +// for replica::load(..) only +replica::replica(replica_stub* stub, replication_options& options) +: serverlet("replica") +{ + dassert (stub, ""); + _stub = stub; + _app = nullptr; + + _options = options; + + init_state(); +} + +// for create new replica only used in replica_stub::on_config_proposal +replica::replica(replica_stub* stub, global_partition_id gpid, replication_options& options) +: serverlet("replica") +{ + dassert (stub, ""); + _stub = stub; + _app = nullptr; + _options = options; + + init_state(); + _config.gpid = gpid; +} + +void replica::init_state() +{ + _inactive_is_transient = false; + _log = nullptr; + _prepare_list = new prepare_list( + 0, + _options.staleness_for_start_prepare_for_potential_secondary, + std::bind( + &replica::execute_mutation, + this, + std::placeholders::_1 + ) + ); + + _config.ballot = 0; + _config.gpid.pidx = 0; + _config.gpid.app_id = 0; + _config.status = PS_INACTIVE; + _primary_states.membership.ballot = 0; + _last_config_change_time_ms = now_ms(); +} + +replica::~replica(void) +{ + close(); + + if (nullptr != _prepare_list) + { + delete _prepare_list; + _prepare_list = nullptr; + } + + if (nullptr != _app) + { + delete _app; + _app = nullptr; + } +} + +void replica::on_client_read(const read_request_header& meta, message_ptr& request) +{ + if (status() == PS_INACTIVE || status() == PS_POTENTIAL_SECONDARY) + { + response_client_message(request, ERR_INVALID_STATE); + return; + } + + dassert (_app != nullptr, ""); + _app->dispatch_rpc_call(meta.code, request, true); +} + +void replica::response_client_message(message_ptr& request, error_code error, decree d/* = invalid_decree*/) +{ + message_ptr resp = request->create_response(); + int err = error.get(); + resp->writer().write(err); + + if (error != ERR_SUCCESS) + { + dwarn("handle request with rpc_id = %016llx failed, err = %s", + request->header().rpc_id, error.to_string()); + } + + rpc::reply(resp); +} + +void replica::execute_mutation(mutation_ptr& mu) +{ + dassert (nullptr != _app, ""); + + int err = ERR_SUCCESS; + switch (status()) + { + case PS_INACTIVE: + if (_app->last_committed_decree() + 1 == mu->data.header.decree) + err = _app->write_internal(mu, false); + break; + case PS_PRIMARY: + case PS_SECONDARY: + { + dassert (_app->last_committed_decree() + 1 == mu->data.header.decree, ""); + bool ack_client = (status() == PS_PRIMARY); + if (ack_client) + { + if (mu->client_request == nullptr) + ack_client = false; + else if (mu->client_request->header().from_address.ip == 0) + ack_client = false; + } + err = _app->write_internal(mu, ack_client); + + //PerformanceCounters::Increment(PerfCounters_LocalCommitQps, nullptr); + } + break; + case PS_POTENTIAL_SECONDARY: + if (LearningSucceeded == _potential_secondary_states.LearningState) + { + if (mu->data.header.decree == _app->last_committed_decree() + 1) + { + err = _app->write_internal(mu, false); + } + else + { + dassert (mu->data.header.decree <= _app->last_committed_decree(), ""); + } + } + else + { + // drop mutations as learning will catch up + ddebug("%s: mutation %s skipped coz learing buffer overflow", name(), mu->name()); + } + break; + case PS_ERROR: + break; + } + + ddebug("TwoPhaseCommit, %s: mutation %s committed, err = %x", name(), mu->name(), err); + + if (err != ERR_SUCCESS) + { + handle_local_failure(err); + } +} + +mutation_ptr replica::new_mutation(decree decree) +{ + mutation_ptr mu(new mutation()); + mu->data.header.gpid = get_gpid(); + mu->data.header.ballot = get_ballot(); + mu->data.header.decree = decree; + mu->data.header.log_offset = invalid_offset; + return mu; +} + +bool replica::group_configuration(__out_param partition_configuration& config) const +{ + if (PS_PRIMARY != status()) + return false; + + config = _primary_states.membership; + return true; +} + +decree replica::last_durable_decree() const { return _app->last_durable_decree(); } + +decree replica::last_prepared_decree() const +{ + ballot lastBallot = 0; + decree start = last_committed_decree(); + while (true) + { + auto mu = _prepare_list->get_mutation_by_decree(start + 1); + if (mu == nullptr || mu->data.header.ballot < lastBallot || !mu->is_prepared()) + break; + start++; + lastBallot = mu->data.header.ballot; + } + return start; +} + +void replica::close() +{ + if (status() != PS_INACTIVE && status() != PS_ERROR) + { + update_local_configuration_with_no_ballot_change(PS_INACTIVE); + } + + cleanup_preparing_mutations(true); + _primary_states.cleanup(); + _potential_secondary_states.cleanup(true); + + if (_app != nullptr) + { + _app->close(false); + } +} + +}} // namespace diff --git a/src/apps/replication/lib/replica.h b/src/apps/replication/lib/replica.h new file mode 100644 index 0000000000..2b9595e4a8 --- /dev/null +++ b/src/apps/replication/lib/replica.h @@ -0,0 +1,202 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +// +// a replica is a replication partition of a serivce, +// which handles all replication related issues +// and redirect the app messages to replication_app_base +// which is binded to this replication partition +// + +# include +# include "replication_common.h" +# include "mutation.h" +# include "prepare_list.h" +# include "replica_context.h" + +namespace dsn { namespace replication { + +class replication_app_base; +class mutation_log; +class replica_stub; + +using namespace ::dsn::service; + +class replica : public serverlet, public ref_object +{ +public: + ~replica(void); + + // + // routines for replica stub + // + static replica* load(replica_stub* stub, const char* dir, replication_options& options, bool renameDirOnFailure); + static replica* newr(replica_stub* stub, const char* app_type, global_partition_id gpid, replication_options& options); + void replay_mutation(mutation_ptr& mu); + void reset_prepare_list_after_replay(); + bool update_local_configuration_with_no_ballot_change(partition_status status); + void set_inactive_state_transient(bool t); + void close(); + + // + // requests from clients + // + void on_client_write(int code, message_ptr& request); + void on_client_read(const read_request_header& meta, message_ptr& request); + + // + // messages and tools from/for meta server + // + void on_config_proposal(configuration_update_request& proposal); + void on_config_sync(const partition_configuration& config); + + // + // messages from peers (primary or secondary) + // + void on_prepare(message_ptr& request); + void on_learn(const learn_request& request, __out_param learn_response& response); + void on_learn_completion_notification(const group_check_response& report); + void on_add_learner(const group_check_request& request); + void on_remove(const replica_configuration& request); + void on_group_check(const group_check_request& request, __out_param group_check_response& response); + + // + // messsages from liveness monitor + // + void on_meta_server_disconnected(); + + // + // routine for testing purpose only + // + void send_group_check_once_for_test(int delay_milliseconds); + + // + // local information query + // + ballot get_ballot() const {return _config.ballot; } + partition_status status() const { return _config.status; } + global_partition_id get_gpid() const { return _config.gpid; } + replication_app_base* get_app() { return _app; } + decree max_prepared_decree() const { return _prepare_list->max_decree(); } + decree last_committed_decree() const { return _prepare_list->last_committed_decree(); } + decree last_prepared_decree() const; + decree last_durable_decree() const; + const std::string& dir() const { return _dir; } + bool group_configuration(__out_param partition_configuration& config) const; + uint64_t last_config_change_time_milliseconds() const { return _last_config_change_time_ms; } + const char* name() const { return _name; } + +private: + // common helpers + void init_state(); + void response_client_message(message_ptr& request, error_code error, decree decree = -1); + void execute_mutation(mutation_ptr& mu); + mutation_ptr new_mutation(decree decree); + + // initialization + int init_app_and_prepare_list(const char* app_type, bool create_new); + int initialize_on_load(const char* dir, bool renameDirOnFailure); + int initialize_on_new(const char* app_type, global_partition_id gpid); + replica(replica_stub* stub, replication_options& options); // for replica::load(..) only + replica(replica_stub* stub, global_partition_id gpid, replication_options& options); // for replica::newr(...) only + + ///////////////////////////////////////////////////////////////// + // 2pc + void init_prepare(mutation_ptr& mu); + void send_prepare_message(const end_point& addr, partition_status status, mutation_ptr& mu, int timeout_milliseconds); + void on_append_log_completed(mutation_ptr& mu, uint32_t err, uint32_t size); + void on_prepare_reply(std::pair pr, int err, message_ptr& request, message_ptr& reply); + void do_possible_commit_on_primary(mutation_ptr& mu); + void ack_prepare_message(int err, mutation_ptr& mu); + void cleanup_preparing_mutations(bool is_primary); + + ///////////////////////////////////////////////////////////////// + // learning + void init_learn(uint64_t signature); + void on_learn_reply(error_code err, std::shared_ptr& req, std::shared_ptr& resp); + void on_learn_remote_state(std::shared_ptr resp); + void on_learn_remote_state_completed(int err); + void handle_learning_error(int err); + void handle_learning_succeeded_on_primary(const end_point& node, uint64_t learnSignature); + void notify_learn_completion(); + + ///////////////////////////////////////////////////////////////// + // failure handling + void handle_local_failure(int error); + void handle_remote_failure(partition_status status, const end_point& node, int error); + + ///////////////////////////////////////////////////////////////// + // reconfiguration + void assign_primary(configuration_update_request& proposal); + void add_potential_secondary(configuration_update_request& proposal); + void upgrade_to_secondary_on_primary(const end_point& node); + void downgrade_to_secondary_on_primary(configuration_update_request& proposal); + void downgrade_to_inactive_on_primary(configuration_update_request& proposal); + void remove(configuration_update_request& proposal); + void update_configuration_on_meta_server(config_type type, const end_point& node, partition_configuration& newConfig); + void on_update_configuration_on_meta_server_reply(error_code err, message_ptr& request, message_ptr& response, std::shared_ptr req); + // return if is_closing + bool update_configuration(const partition_configuration& config); + bool update_local_configuration(const replica_configuration& config, bool same_ballot = false); + void replay_prepare_list(); + bool is_same_ballot_status_change_allowed(partition_status olds, partition_status news); + + ///////////////////////////////////////////////////////////////// + // group check + void init_group_check(); + void broadcast_group_check(); + void on_group_check_reply(error_code err, std::shared_ptr& req, std::shared_ptr& resp); + +private: + // replica configuration, updated by update_local_configuration ONLY + replica_configuration _config; + uint64_t _last_config_change_time_ms; + + // prepare list + prepare_list* _prepare_list; + + // private log (if enabled) + mutation_log* _log; + + // application + replication_app_base* _app; + + // constants + replica_stub* _stub; + std::string _dir; + char _name[256]; // app.index @ host:port + replication_options _options; + + // replica status specific states + primary_context _primary_states; + potential_secondary_context _potential_secondary_states; + bool _inactive_is_transient; // upgrade to P/S is allowed only iff true +}; + +DEFINE_REF_OBJECT(replica) + +}} // namespace diff --git a/src/apps/replication/lib/replica_2pc.cpp b/src/apps/replication/lib/replica_2pc.cpp new file mode 100644 index 0000000000..36e05ddaaa --- /dev/null +++ b/src/apps/replication/lib/replica_2pc.cpp @@ -0,0 +1,462 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replica.h" +#include "mutation.h" +#include "mutation_log.h" +#include "replica_stub.h" + +#define __TITLE__ "TwoPhaseCommit" + +namespace dsn { namespace replication { + + +void replica::on_client_write(int code, message_ptr& request) +{ + check_hashed_access(); + + if (PS_PRIMARY != status()) + { + response_client_message(request, ERR_INVALID_STATE); + return; + } + + mutation_ptr mu = new_mutation(_prepare_list->max_decree() + 1); + mu->set_client_request(code, request); + init_prepare(mu); +} + +void replica::init_prepare(mutation_ptr& mu) +{ + dassert (PS_PRIMARY == status(), ""); + + error_code err = ERR_SUCCESS; + uint8_t count = 0; + + if (static_cast(_primary_states.membership.secondaries.size()) + 1 < _options.mutation_2pc_min_replica_count) + { + err = ERR_NOT_ENOUGH_MEMBER; + goto ErrOut; + } + + mu->data.header.last_committed_decree = last_committed_decree(); + if (mu->data.header.decree == invalid_decree) + { + mu->set_id(get_ballot(), _prepare_list->max_decree() + 1); + } + else + { + mu->set_id(get_ballot(), mu->data.header.decree); + } + + if (mu->data.header.decree > _prepare_list->max_decree() && _prepare_list->count() >= _options.staleness_for_commit) + { + err = ERR_CAPACITY_EXCEEDED; + goto ErrOut; + } + + dassert (mu->data.header.decree > last_committed_decree(), ""); + + // local prepare without log + err = _prepare_list->prepare(mu, PS_PRIMARY); + if (err != ERR_SUCCESS) + { + goto ErrOut; + } + + ddebug("%s: mutation %s init_prepare", name(), mu->name()); + + // + // TODO: bounded staleness on secondaries + // + dassert (mu->data.header.decree <= last_committed_decree() + _options.staleness_for_commit, ""); + + // remote prepare + dassert (mu->remote_tasks().size() == 0, ""); + mu->set_left_secondary_ack_count((unsigned int)_primary_states.membership.secondaries.size()); + for (auto it = _primary_states.membership.secondaries.begin(); it != _primary_states.membership.secondaries.end(); it++) + { + send_prepare_message(*it, PS_SECONDARY, mu, _options.prepare_timeout_ms_for_secondaries); + } + + count = 0; + for (auto it = _primary_states.learners.begin(); it != _primary_states.learners.end(); it++) + { + if (it->second.prepare_start_decree != invalid_decree && mu->data.header.decree >= it->second.prepare_start_decree) + { + send_prepare_message(it->first, PS_POTENTIAL_SECONDARY, mu, _options.prepare_timeout_ms_for_potential_secondaries); + count++; + } + } + mu->set_left_potential_secondary_ack_count(count); + + // local log + dassert (mu->data.header.log_offset == invalid_offset, ""); + dassert (mu->log_task() == nullptr, ""); + mu->log_task() = _stub->_log->append(mu, + LPC_WRITE_REPLICATION_LOG, + this, + std::bind(&replica::on_append_log_completed, this, mu, + std::placeholders::_1, + std::placeholders::_2), + gpid_to_hash(get_gpid()) + ); + + if (nullptr == mu->log_task()) + { + err = ERR_FILE_OPERATION_FAILED; + handle_local_failure(err); + goto ErrOut; + } + + return; + +ErrOut: + response_client_message(mu->client_request, err); + return; +} + +void replica::send_prepare_message(const end_point& addr, partition_status status, mutation_ptr& mu, int timeout_milliseconds) +{ + message_ptr msg = message::create_request(RPC_PREPARE, timeout_milliseconds, gpid_to_hash(get_gpid())); + marshall(msg, get_gpid()); + + replica_configuration rconfig; + _primary_states.get_replica_config(status, rconfig); + + marshall(msg, rconfig); + mu->write_to(msg); + + dbg_dassert (mu->remote_tasks().find(addr) == mu->remote_tasks().end()); + + mu->remote_tasks()[addr] = rpc::call(addr, msg, + this, + std::bind(&replica::on_prepare_reply, + this, + std::make_pair(mu, rconfig.status), + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3), + gpid_to_hash(get_gpid()) + ); + + ddebug( + "%s: mutation %s send_prepare_message to %s:%d as %s", + name(), mu->name(), + addr.name.c_str(), static_cast(addr.port), + enum_to_string(rconfig.status) + ); +} + +void replica::do_possible_commit_on_primary(mutation_ptr& mu) +{ + dassert (_config.ballot == mu->data.header.ballot, ""); + dassert (PS_PRIMARY == status(), ""); + + if (mu->is_ready_for_commit()) + { + _prepare_list->commit(mu->data.header.decree, false); + + //PerformanceCounters::Decrement(PerfCounters_TwoPhaseCommitOngoing, nullptr); + //PerformanceCounters::Increment(PerfCounters_TwoPhaseCommitQps, nullptr); + + //uint64_t duration =now_ms() - mu->start_time_milliseconds(); + //PerformanceCounters::Set(PerfCounters_TwoPhaseCommitDurationMs, duration, nullptr); + } +} + +void replica::on_prepare(message_ptr& request) +{ + check_hashed_access(); + + replica_configuration rconfig; + unmarshall(request, rconfig); + + mutation_ptr mu = mutation::read_from(request); + decree decree = mu->data.header.decree; + + ddebug( "%s: mutation %s on_prepare", name(), mu->name()); + + dassert (mu->data.header.ballot == rconfig.ballot, ""); + + if (mu->data.header.ballot < get_ballot()) + { + ddebug( "%s: mutation %s on_prepare skipped due to old view", name(), mu->name()); + return; + } + + // update configuration when necessary + else if (rconfig.ballot > get_ballot()) + { + update_local_configuration(rconfig); + } + + if (PS_INACTIVE == status() || PS_ERROR == status()) + { + ddebug( + "%s: mutation %s on_prepare to %s skipped", + name(), mu->name(), + enum_to_string(status()) + ); + ack_prepare_message(ERR_INVALID_STATE, mu); + return; + } + + else if (PS_POTENTIAL_SECONDARY == status()) + { + if (_potential_secondary_states.LearningState != LearningWithPrepare && _potential_secondary_states.LearningState != LearningSucceeded) + { + ddebug( + "%s: mutation %s on_prepare to %s skipped, learnings state = %s", + name(), mu->name(), + enum_to_string(status()), + enum_to_string(_potential_secondary_states.LearningState) + ); + + // do not retry as there may retries later + return; + } + } + + dassert (rconfig.status == status(), ""); + if (decree <= last_committed_decree()) + { + ack_prepare_message(ERR_SUCCESS, mu); + return; + } + + // real prepare start + auto mu2 = _prepare_list->get_mutation_by_decree(decree); + if (mu2 != nullptr && mu2->data.header.ballot == mu->data.header.ballot) + { + ddebug( "%s: mutation %s redundant prepare skipped", name(), mu->name()); + + if (mu2->is_prepared()) + { + ack_prepare_message(ERR_SUCCESS, mu); + } + return; + } + + int err = _prepare_list->prepare(mu, status()); + dassert (err == ERR_SUCCESS, ""); + + if (PS_POTENTIAL_SECONDARY == status()) + { + dassert (mu->data.header.decree <= last_committed_decree() + _options.staleness_for_start_prepare_for_potential_secondary, ""); + } + else + { + dassert (PS_SECONDARY == status(), ""); + dassert (mu->data.header.decree <= last_committed_decree() + _options.staleness_for_commit, ""); + } + + // write log + dassert (mu->log_task() == nullptr, ""); + mu->log_task() = _stub->_log->append(mu, + LPC_WRITE_REPLICATION_LOG, + this, + std::bind(&replica::on_append_log_completed, this, mu, std::placeholders::_1, std::placeholders::_2), + gpid_to_hash(get_gpid()) + ); + + if (nullptr == mu->log_task()) + { + err = ERR_FILE_OPERATION_FAILED; + ack_prepare_message(err, mu); + handle_local_failure(err); + } +} + +void replica::on_append_log_completed(mutation_ptr& mu, uint32_t err, uint32_t size) +{ + check_hashed_access(); + + ddebug( "%s: mutation %s on_append_log_completed, err = %u", name(), mu->name(), err); + + if (err == ERR_SUCCESS) + { + mu->set_logged(); + } + + // skip old mutations + if (mu->data.header.ballot < get_ballot() || status() == PS_INACTIVE) + { + return; + } + + switch (status()) + { + case PS_PRIMARY: + if (err == ERR_SUCCESS) + { + do_possible_commit_on_primary(mu); + } + else + { + handle_local_failure(err); + } + break; + case PS_SECONDARY: + case PS_POTENTIAL_SECONDARY: + if (err != ERR_SUCCESS) + { + handle_local_failure(err); + } + ack_prepare_message(err, mu); + break; + case PS_ERROR: + break; + default: + dassert (false, ""); + break; + } +} + +void replica::on_prepare_reply(std::pair pr, int err, message_ptr& request, message_ptr& reply) +{ + check_hashed_access(); + + mutation_ptr& mu = pr.first; + partition_status targetStatus = pr.second; + + // skip callback for old mutations + if (mu->data.header.ballot < get_ballot() || PS_PRIMARY != status()) + return; + + dassert (mu->data.header.ballot == get_ballot(), ""); + + end_point node = request->header().to_address; + partition_status st = _primary_states.GetNodeStatus(node); + + // handle reply + prepare_ack resp; + + // handle error + if (err) + { + resp.err = err; + } + else + { + unmarshall(reply, resp); + + ddebug( + "%s: mutation %s on_prepare_reply from %s:%d", + name(), mu->name(), + node.name.c_str(), static_cast(node.port) + ); + } + + if (resp.err == ERR_SUCCESS) + { + dassert (resp.ballot == get_ballot(), ""); + dassert (resp.decree == mu->data.header.decree, ""); + + switch (targetStatus) + { + case PS_SECONDARY: + dassert (_primary_states.check_exist(node, PS_SECONDARY), ""); + dassert (mu->left_secondary_ack_count() > 0, ""); + if (0 == mu->decrease_left_secondary_ack_count()) + { + do_possible_commit_on_primary(mu); + } + break; + case PS_POTENTIAL_SECONDARY: + dassert (mu->left_potential_secondary_ack_count() > 0, ""); + if (0 == mu->decrease_left_potential_secondary_ack_count()) + { + do_possible_commit_on_primary(mu); + } + break; + default: + ddebug( + "%s: mutation %s prepare ack skipped coz the node is now inactive", name(), mu->name() + ); + break; + } + } + + // failure handling + else + { + // note targetStatus and (curent) status may diff + if (targetStatus == PS_POTENTIAL_SECONDARY) + { + dassert (mu->left_potential_secondary_ack_count() > 0, ""); + if (0 == mu->decrease_left_potential_secondary_ack_count()) + { + do_possible_commit_on_primary(mu); + } + } + handle_remote_failure(st, node, resp.err); + } +} + +void replica::ack_prepare_message(int err, mutation_ptr& mu) +{ + prepare_ack resp; + resp.gpid = get_gpid(); + resp.err = err; + resp.ballot = get_ballot(); + resp.decree = mu->data.header.decree; + + // for PS_POTENTIAL_SECONDARY ONLY + resp.last_committed_decree_in_app = _app->last_committed_decree(); + resp.last_committed_decree_in_prepare_list = last_committed_decree(); + + dassert (nullptr != mu->owner_message(), ""); + reply(mu->owner_message(), resp); + + ddebug( "%s: mutation %s ack_prepare_message", name(), mu->name()); +} + +void replica::cleanup_preparing_mutations(bool is_primary) +{ + decree start = last_committed_decree() + 1; + decree end = _prepare_list->max_decree(); + + for (decree decree = start; decree <= end; decree++) + { + mutation_ptr mu = _prepare_list->get_mutation_by_decree(decree); + if (mu != nullptr) + { + int c = mu->clear_prepare_or_commit_tasks(); + if (!is_primary) + { + dassert (0 == c, ""); + } + else + { + ////PerformanceCounters::Decrement(PerfCounters_TwoPhaseCommitOngoing, nullptr); + } + + mu->clear_log_task(); + } + } +} + +}} // namespace diff --git a/src/apps/replication/lib/replica_check.cpp b/src/apps/replication/lib/replica_check.cpp new file mode 100644 index 0000000000..0270f11bd1 --- /dev/null +++ b/src/apps/replication/lib/replica_check.cpp @@ -0,0 +1,203 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replica.h" +#include "mutation.h" +#include "mutation_log.h" +#include "replica_stub.h" + +#define __TITLE__ "GroupCheck" + +namespace dsn { namespace replication { + +void replica::init_group_check() +{ + if (PS_PRIMARY != status() || _options.group_check_disabled) + return; + + dassert (nullptr == _primary_states.group_check_task, ""); + _primary_states.group_check_task = tasking::enqueue( + LPC_GROUP_CHECK, + this, + &replica::broadcast_group_check, + gpid_to_hash(get_gpid()), + 0, + _options.group_check_internal_ms + ); +} + +void replica::broadcast_group_check() +{ + dassert (nullptr != _primary_states.group_check_task, ""); + if (_primary_states.group_check_pending_replies.size() > 0) + { + dwarn( + "%s: %u group check replies are still pending when doing next round check", + name(), static_cast(_primary_states.group_check_pending_replies.size()) + ); + + for (auto it = _primary_states.group_check_pending_replies.begin(); it != _primary_states.group_check_pending_replies.end(); it++) + { + it->second->cancel(true); + } + _primary_states.group_check_pending_replies.clear(); + } + + for (auto it = _primary_states.statuses.begin(); it != _primary_states.statuses.end(); it++) + { + if (it->first == primary_address()) + continue; + + end_point addr = it->first; + std::shared_ptr request(new group_check_request); + + request->app_type = _primary_states.membership.app_type; + request->node = addr; + _primary_states.get_replica_config(addr, request->config); + request->last_committed_decree = last_committed_decree(); + request->learner_signature = 0; + if (it->second == PS_POTENTIAL_SECONDARY) + { + auto it2 = _primary_states.learners.find(it->first); + dassert (it2 != _primary_states.learners.end(), ""); + request->learner_signature = it2->second.signature; + } + + task_ptr callback_task = rpc::call_typed( + addr, + RPC_GROUP_CHECK, + request, + this, + &replica::on_group_check_reply, + gpid_to_hash(get_gpid()) + ); + + _primary_states.group_check_pending_replies[addr] = callback_task; + + ddebug( + "%s: init_group_check for %s:%d", name(), addr.name.c_str(), addr.port + ); + } +} + +void replica::on_group_check(const group_check_request& request, __out_param group_check_response& response) +{ + ddebug( + "%s: on_group_check from %s:%d", + name(), request.config.primary.name.c_str(), request.config.primary.port + ); + + if (request.config.ballot < get_ballot()) + { + response.err = ERR_VERSION_OUTDATED; + return; + } + else if (request.config.ballot > get_ballot()) + { + update_local_configuration(request.config); + } + else if (is_same_ballot_status_change_allowed(status(), request.config.status)) + { + update_local_configuration(request.config, true); + } + + switch (status()) + { + case PS_INACTIVE: + break; + case PS_SECONDARY: + if (request.last_committed_decree > last_committed_decree()) + { + _prepare_list->commit(request.last_committed_decree, true); + } + break; + case PS_POTENTIAL_SECONDARY: + init_learn(request.learner_signature); + break; + case PS_ERROR: + break; + default: + dassert (false, ""); + } + + response.gpid = get_gpid(); + response.node = primary_address(); + response.err = ERR_SUCCESS; + if (status() == PS_ERROR) + { + response.err = ERR_INVALID_STATE; + } + + response.last_committed_decree_in_app = _app->last_committed_decree(); + response.last_committed_decree_in_prepare_list = last_committed_decree(); + response.learner_status_ = _potential_secondary_states.LearningState; + response.learner_signature = _potential_secondary_states.learning_signature; +} + +void replica::on_group_check_reply(error_code err, std::shared_ptr& req, std::shared_ptr& resp) +{ + if (PS_PRIMARY != status() || req->config.ballot < get_ballot()) + { + return; + } + + auto r = _primary_states.group_check_pending_replies.erase(req->node); + dassert (r == 1, ""); + + if (err) + { + handle_remote_failure(req->config.status, req->node, err); + } + else + { + if (resp->err == ERR_SUCCESS) + { + if (resp->learner_status_ == LearningSucceeded && req->config.status == PS_POTENTIAL_SECONDARY) + { + handle_learning_succeeded_on_primary(req->node, resp->learner_signature); + } + } + else + { + handle_remote_failure(req->config.status, req->node, resp->err); + } + } +} + +// for testing purpose only +void replica::send_group_check_once_for_test(int delay_milliseconds) +{ + dassert (_options.group_check_disabled, ""); + + _primary_states.group_check_task = tasking::enqueue( + LPC_GROUP_CHECK, + this, + &replica::broadcast_group_check, + gpid_to_hash(get_gpid()), + delay_milliseconds + ); +} + +}} // end namepspace diff --git a/src/apps/replication/lib/replica_config.cpp b/src/apps/replication/lib/replica_config.cpp new file mode 100644 index 0000000000..cd45fe5447 --- /dev/null +++ b/src/apps/replication/lib/replica_config.cpp @@ -0,0 +1,705 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replica.h" +#include "mutation.h" +#include "mutation_log.h" +#include "replica_stub.h" +#include "replication_failure_detector.h" +#include "rpc_replicated.h" + +#define __TITLE__ "Configuration" + +namespace dsn { namespace replication { + +void replica::on_config_proposal(configuration_update_request& proposal) +{ + check_hashed_access(); + + ddebug( + "%s: on_config_proposal %s for %s:%d", + name(), + enum_to_string(proposal.type), + proposal.node.name.c_str(), static_cast(proposal.node.port) + ); + + if (proposal.config.ballot < get_ballot()) + return; + + if (proposal.config.ballot > get_ballot()) + { + if (update_configuration(proposal.config)) + { + // is closing + return; + } + } + + switch (proposal.type) + { + case CT_ASSIGN_PRIMARY: + assign_primary(proposal); + break; + case CT_ADD_SECONDARY: + add_potential_secondary(proposal); + break; + case CT_DOWNGRADE_TO_SECONDARY: + downgrade_to_secondary_on_primary(proposal); + break; + case CT_DOWNGRADE_TO_INACTIVE: + downgrade_to_inactive_on_primary(proposal); + break; + case CT_REMOVE: + remove(proposal); + break; + default: + dassert (false, ""); + } +} + +void replica::assign_primary(configuration_update_request& proposal) +{ + dassert(proposal.node == primary_address(), ""); + + if (status() == PS_PRIMARY) + { + dwarn( + "%s: invalid assgin primary proposal as the node is in %s", + name(), + enum_to_string(status())); + return; + } + + proposal.config.primary = primary_address(); + replica_helper::remove_node(primary_address(), proposal.config.secondaries); + replica_helper::remove_node(primary_address(), proposal.config.drop_outs); + + update_configuration_on_meta_server(CT_ASSIGN_PRIMARY, proposal.node, proposal.config); +} + +void replica::add_potential_secondary(configuration_update_request& proposal) +{ + if (status() != PS_PRIMARY) + { + return; + } + + dassert (proposal.config.ballot == get_ballot(), ""); + dassert (proposal.config.gpid == _primary_states.membership.gpid, ""); + dassert (proposal.config.app_type == _primary_states.membership.app_type, ""); + dassert (proposal.config.primary == _primary_states.membership.primary, ""); + dassert (proposal.config.secondaries == _primary_states.membership.secondaries, ""); + + // zy: work around for meta server bug + if (_primary_states.check_exist(proposal.node, PS_PRIMARY) + || _primary_states.check_exist(proposal.node, PS_SECONDARY)) + return; + + dassert (!_primary_states.check_exist(proposal.node, PS_PRIMARY), ""); + dassert (!_primary_states.check_exist(proposal.node, PS_SECONDARY), ""); + + if (_primary_states.learners.find(proposal.node) != _primary_states.learners.end()) + { + return; + } + + remote_learner_state state; + state.prepare_start_decree = invalid_decree; + state.signature = random64(0, (uint64_t)(-1LL)); + state.timeout_task = nullptr; // TODO: add timer for learner task + + _primary_states.learners[proposal.node] = state; + _primary_states.statuses[proposal.node] = PS_POTENTIAL_SECONDARY; + + group_check_request request; + request.app_type = _primary_states.membership.app_type; + request.node = proposal.node; + _primary_states.get_replica_config(proposal.node, request.config); + request.last_committed_decree = last_committed_decree(); + request.learner_signature = state.signature; + + rpc::call_one_way_typed(proposal.node, RPC_LEARN_ADD_LEARNER, request, gpid_to_hash(get_gpid())); +} + +void replica::upgrade_to_secondary_on_primary(const end_point& node) +{ + ddebug( + "%s: upgrade potential secondary %s:%d to secondary", + name(), + node.name.c_str(), static_cast(node.port) + ); + + partition_configuration newConfig = _primary_states.membership; + + // remove from drop out if there + replica_helper::remove_node(node, newConfig.drop_outs); + // add secondary + newConfig.secondaries.push_back(node); + + update_configuration_on_meta_server(CT_UPGRADE_TO_SECONDARY, node, newConfig); +} + +void replica::downgrade_to_secondary_on_primary(configuration_update_request& proposal) +{ + if (proposal.config.ballot != get_ballot() || status() != PS_PRIMARY) + return; + + dassert (proposal.config.gpid == _primary_states.membership.gpid, ""); + dassert (proposal.config.app_type == _primary_states.membership.app_type, ""); + dassert (proposal.config.primary == _primary_states.membership.primary, ""); + dassert (proposal.config.secondaries == _primary_states.membership.secondaries, ""); + dassert (proposal.node == proposal.config.primary, ""); + + proposal.config.primary = dsn::end_point::INVALID; + proposal.config.secondaries.push_back(proposal.node); + + update_configuration_on_meta_server(CT_DOWNGRADE_TO_SECONDARY, proposal.node, proposal.config); +} + + +void replica::downgrade_to_inactive_on_primary(configuration_update_request& proposal) +{ + if (proposal.config.ballot != get_ballot() || status() != PS_PRIMARY) + return; + + dassert (proposal.config.gpid == _primary_states.membership.gpid, ""); + dassert (proposal.config.app_type == _primary_states.membership.app_type, ""); + dassert (proposal.config.primary == _primary_states.membership.primary, ""); + dassert (proposal.config.secondaries == _primary_states.membership.secondaries, ""); + + if (proposal.node == proposal.config.primary) + { + proposal.config.primary = dsn::end_point::INVALID; + } + else + { + auto rt = replica_helper::remove_node(proposal.node, proposal.config.secondaries); + dassert (rt, ""); + } + + proposal.config.drop_outs.push_back(proposal.node); + update_configuration_on_meta_server(CT_DOWNGRADE_TO_INACTIVE, proposal.node, proposal.config); +} + +void replica::remove(configuration_update_request& proposal) +{ + if (proposal.config.ballot != get_ballot() || status() != PS_PRIMARY) + return; + + dassert (proposal.config.gpid == _primary_states.membership.gpid, ""); + dassert (proposal.config.app_type == _primary_states.membership.app_type, ""); + dassert (proposal.config.primary == _primary_states.membership.primary, ""); + dassert (proposal.config.secondaries == _primary_states.membership.secondaries, ""); + + auto st = _primary_states.GetNodeStatus(proposal.node); + + switch (st) + { + case PS_PRIMARY: + dassert (proposal.config.primary == proposal.node, ""); + proposal.config.primary = dsn::end_point::INVALID; + break; + case PS_SECONDARY: + { + auto rt = replica_helper::remove_node(proposal.node, proposal.config.secondaries); + dassert (rt, ""); + } + break; + case PS_POTENTIAL_SECONDARY: + { + auto rt = replica_helper::remove_node(proposal.node, proposal.config.drop_outs); + dassert (rt, ""); + } + break; + } + + update_configuration_on_meta_server(CT_REMOVE, proposal.node, proposal.config); +} + +// from primary +void replica::on_remove(const replica_configuration& request) +{ + if (request.ballot < get_ballot()) + return; + + dassert (request.status == PS_INACTIVE, ""); + update_local_configuration(request); +} + +void replica::update_configuration_on_meta_server(config_type type, const end_point& node, partition_configuration& newConfig) +{ + newConfig.last_committed_decree = last_committed_decree(); + + if (type != CT_ASSIGN_PRIMARY) + { + dassert (status() == PS_PRIMARY, ""); + dassert (newConfig.ballot == _primary_states.membership.ballot, ""); + } + + // disable 2pc during reconfiguration + // it is possible to do this only for CT_DOWNGRADE_TO_SECONDARY, + // we therefore choose to disable 2pc during all reconfiguration types + // to achieve consistency at the cost of certain write throughput + update_local_configuration_with_no_ballot_change(PS_INACTIVE); + set_inactive_state_transient(true); + + message_ptr msg = message::create_request(RPC_CM_CALL); + meta_request_header hdr; + hdr.rpc_tag = RPC_CM_UPDATE_PARTITION_CONFIGURATION; + marshall(msg, hdr); + + std::shared_ptr request(new configuration_update_request); + request->config = newConfig; + request->config.ballot++; + request->type = type; + request->node = node; + marshall(msg, *request); + + if (nullptr != _primary_states.reconfiguration_task) + { + _primary_states.reconfiguration_task->cancel(true); + } + + _primary_states.reconfiguration_task = rpc::call_replicated( + _stub->_failure_detector->current_server_contact(), + _stub->_failure_detector->get_servers(), + msg, + this, + std::bind(&replica::on_update_configuration_on_meta_server_reply, this, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + request), + gpid_to_hash(get_gpid()) + ); +} + + +void replica::on_update_configuration_on_meta_server_reply(error_code err, message_ptr& request, message_ptr& response, std::shared_ptr req) +{ + check_hashed_access(); + + if (PS_INACTIVE != status() || _stub->is_connected() == false) + { + _primary_states.reconfiguration_task = nullptr; + return; + } + + if (err) + { + ddebug( + "%s: update configuration reply with err %s, request ballot %lld", + name(), + err.to_string(), + req->config.ballot + ); + + _primary_states.reconfiguration_task = rpc::call_replicated( + _stub->_failure_detector->current_server_contact(), + _stub->_failure_detector->get_servers(), + request, + this, + std::bind(&replica::on_update_configuration_on_meta_server_reply, this, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + req), + gpid_to_hash(get_gpid()) + ); + return; + } + + configuration_update_response resp; + unmarshall(response, resp); + + ddebug( + "%s: update configuration reply with err %x, ballot %lld, local %lld", + name(), + resp.err, + resp.config.ballot, + get_ballot() + ); + + if (resp.config.ballot < get_ballot()) + { + _primary_states.reconfiguration_task = nullptr; + return; + } + + // post-update work items? + if (resp.err == ERR_SUCCESS) + { + dassert (req->config.gpid == resp.config.gpid, ""); + dassert (req->config.app_type == resp.config.app_type, ""); + dassert (req->config.primary == resp.config.primary, ""); + dassert (req->config.secondaries == resp.config.secondaries, ""); + + switch (req->type) + { + case CT_ASSIGN_PRIMARY: + case CT_DOWNGRADE_TO_SECONDARY: + case CT_DOWNGRADE_TO_INACTIVE: + case CT_UPGRADE_TO_SECONDARY: + break; + case CT_REMOVE: + if (req->node != primary_address()) + { + replica_configuration rconfig; + replica_helper::get_replica_config(resp.config, req->node, rconfig); + rpc::call_one_way_typed(req->node, RPC_REMOVE_REPLICA, rconfig, gpid_to_hash(get_gpid())); + } + break; + default: + dassert (false, ""); + } + } + + update_configuration(resp.config); + _primary_states.reconfiguration_task = nullptr; +} + +bool replica::update_configuration(const partition_configuration& config) +{ + dassert (config.ballot >= get_ballot(), ""); + + replica_configuration rconfig; + replica_helper::get_replica_config(config, primary_address(), rconfig); + + if (rconfig.status == PS_PRIMARY && + (rconfig.ballot > get_ballot() || status() != PS_PRIMARY) + ) + { + _primary_states.reset_membership(config, config.primary != primary_address()); + } + + if (config.ballot > get_ballot() || + is_same_ballot_status_change_allowed(status(), rconfig.status) + ) + { + return update_local_configuration(rconfig, true); + } + else + return false; +} + +bool replica::is_same_ballot_status_change_allowed(partition_status olds, partition_status news) +{ + return + // add learner + (olds == PS_INACTIVE && news == PS_POTENTIAL_SECONDARY) + + // learner ready for secondary + || (olds == PS_POTENTIAL_SECONDARY && news == PS_SECONDARY) + + // meta server come back + || (olds == PS_INACTIVE && news == PS_SECONDARY && _inactive_is_transient) + + // meta server come back + || (olds == PS_INACTIVE && news == PS_PRIMARY && _inactive_is_transient) + + // no change + || (olds == news) + ; +} + +bool replica::update_local_configuration(const replica_configuration& config, bool same_ballot/* = false*/) +{ + dassert(config.ballot > get_ballot() + || (same_ballot && config.ballot == get_ballot()), ""); + dassert (config.gpid == get_gpid(), ""); + + partition_status old_status = status(); + ballot old_ballot = get_ballot(); + + // skip unncessary configuration change + if (old_status == config.status && old_ballot == config.ballot) + return false; + + // skip invalid change + switch (old_status) + { + case PS_ERROR: + { + ddebug( + "%s: status change from %s @ %lld to %s @ %lld is not allowed", + name(), + enum_to_string(old_status), + old_ballot, + enum_to_string(config.status), + config.ballot + ); + return false; + } + break; + case PS_INACTIVE: + if ((config.status == PS_PRIMARY || config.status == PS_SECONDARY) + && !_inactive_is_transient) + { + ddebug( + "%s: status change from %s @ %lld to %s @ %lld is not allowed when inactive state is not transient", + name(), + enum_to_string(old_status), + old_ballot, + enum_to_string(config.status), + config.ballot + ); + return false; + } + break; + case PS_POTENTIAL_SECONDARY: + if (config.status == PS_ERROR || config.status == PS_INACTIVE) + { + if (!_potential_secondary_states.cleanup(false)) + { + dwarn( + "%s: status change from %s @ %lld to %s @ %lld is not allowed coz learning remote state is still running", + name(), + enum_to_string(old_status), + old_ballot, + enum_to_string(config.status), + config.ballot + ); + return false; + } + } + break; + } + + uint64_t oldTs = _last_config_change_time_ms; + _config = config; + _last_config_change_time_ms =now_ms(); + dassert (max_prepared_decree() >= last_committed_decree(), ""); + + switch (old_status) + { + case PS_PRIMARY: + cleanup_preparing_mutations(true); + switch (config.status) + { + case PS_PRIMARY: + replay_prepare_list(); + break; + case PS_INACTIVE: + _primary_states.cleanup(old_ballot != config.ballot); + break; + case PS_SECONDARY: + case PS_ERROR: + _primary_states.cleanup(); + break; + case PS_POTENTIAL_SECONDARY: + dassert (false, "invalid execution path"); + break; + default: + dassert (false, "invalid execution path"); + } + break; + case PS_SECONDARY: + switch (config.status) + { + case PS_PRIMARY: + init_group_check(); + replay_prepare_list(); + break; + case PS_SECONDARY: + break; + case PS_POTENTIAL_SECONDARY: + // InActive in config + break; + case PS_INACTIVE: + break; + case PS_ERROR: + break; + default: + dassert (false, "invalid execution path"); + } + break; + case PS_POTENTIAL_SECONDARY: + switch (config.status) + { + case PS_PRIMARY: + dassert (false, "invalid execution path"); + break; + case PS_SECONDARY: + _prepare_list->truncate(_app->last_committed_decree()); + _potential_secondary_states.cleanup(true); + break; + case PS_POTENTIAL_SECONDARY: + break; + case PS_INACTIVE: + _potential_secondary_states.cleanup(true); + break; + case PS_ERROR: + _prepare_list->reset(_app->last_committed_decree()); + _potential_secondary_states.cleanup(true); + break; + default: + dassert (false, "invalid execution path"); + } + break; + case PS_INACTIVE: + switch (config.status) + { + case PS_PRIMARY: + _inactive_is_transient = false; + init_group_check(); + replay_prepare_list(); + break; + case PS_SECONDARY: + _inactive_is_transient = false; + break; + case PS_POTENTIAL_SECONDARY: + _inactive_is_transient = false; + break; + case PS_INACTIVE: + break; + case PS_ERROR: + _inactive_is_transient = false; + break; + default: + dassert (false, "invalid execution path"); + } + break; + case PS_ERROR: + switch (config.status) + { + case PS_PRIMARY: + dassert (false, "invalid execution path"); + break; + case PS_SECONDARY: + dassert (false, "invalid execution path"); + break; + case PS_POTENTIAL_SECONDARY: + dassert(false, "invalid execution path"); + break; + case PS_INACTIVE: + dassert (false, "invalid execution path"); + break; + case PS_ERROR: + break; + default: + dassert (false, "invalid execution path"); + } + break; + default: + dassert (false, "invalid execution path"); + } + + if (status() != old_status) + { + ddebug( + "%s: status change %s @ %lld => %s @ %lld, pre(%llu, %llu), app(%llu, %llu), duration=%llu ms", + name(), + enum_to_string(old_status), + old_ballot, + enum_to_string(status()), + get_ballot(), + _prepare_list->max_decree(), + _prepare_list->last_committed_decree(), + _app->last_committed_decree(), + _app->last_durable_decree(), + _last_config_change_time_ms - oldTs + ); + + bool isClosing = (status() == PS_ERROR || (status() == PS_INACTIVE && get_ballot() > old_ballot)); + _stub->notify_replica_state_update(config, isClosing); + + if (isClosing) + { + ddebug("%s: being close ...", name()); + _stub->begin_close_replica(this); + return true; + } + } + else + { + _stub->notify_replica_state_update(config, false); + } + return false; +} + +bool replica::update_local_configuration_with_no_ballot_change(partition_status s) +{ + if (status() == s) + return false; + + auto config = _config; + config.status = s; + return update_local_configuration(config, true); +} + +void replica::on_config_sync(const partition_configuration& config) +{ + ddebug( "%s: configuration sync", name()); + + // no update during reconfiguration + if (nullptr != _primary_states.reconfiguration_task) + return; + + // no outdated update + if (config.ballot < get_ballot()) + return; + + update_configuration(config); +} + +void replica::replay_prepare_list() +{ + decree start = last_committed_decree() + 1; + decree end = _prepare_list->max_decree(); + + ddebug( + "%s: replay prepare list from %lld to %lld, ballot = %lld", + name(), + start, + end, + get_ballot() + ); + + for (decree decree = start; decree <= end; decree++) + { + mutation_ptr old = _prepare_list->get_mutation_by_decree(decree); + mutation_ptr mu = new_mutation(decree); + + if (old != nullptr) + { + mu->data.updates = old->data.updates; + mu->client_request = old->client_request; + + dbg_dassert (mu->data.updates.size() == old->data.updates.size()); + } + else + { + ddebug( + "%s: emit empty mutation %s when replay prepare list", + name(), + mu->name() + ); + } + + init_prepare(mu); + } +} + +}} // namespace diff --git a/src/apps/replication/lib/replica_context.cpp b/src/apps/replication/lib/replica_context.cpp new file mode 100644 index 0000000000..32a14f8f71 --- /dev/null +++ b/src/apps/replication/lib/replica_context.cpp @@ -0,0 +1,180 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replica_context.h" + +namespace dsn { namespace replication { + +void primary_context::cleanup(bool clean_pending_mutations) +{ + do_cleanup_pending_mutations(clean_pending_mutations); + + // clean up group check + if (nullptr != group_check_task) + { + group_check_task->cancel(true); + group_check_task = nullptr; + } + + for (auto it = group_check_pending_replies.begin(); it != group_check_pending_replies.end(); it++) + { + it->second->cancel(true); + } + group_check_pending_replies.clear(); + + // clean up reconfiguration + if (nullptr != reconfiguration_task) + { + reconfiguration_task->cancel(true); + reconfiguration_task = nullptr; + } +} + +void primary_context::do_cleanup_pending_mutations(bool clean_pending_mutations) +{ + if (pending_mutation_task != nullptr) + { + pending_mutation_task->cancel(true); + pending_mutation_task = nullptr; + } + + if (clean_pending_mutations) + { + pending_mutation = nullptr; + } +} + +void primary_context::reset_membership(const partition_configuration& config, bool clear_learners) +{ + statuses.clear(); + if (clear_learners) + { + learners.clear(); + } + + membership = config; + + if (membership.primary != dsn::end_point::INVALID) + { + statuses[membership.primary] = PS_PRIMARY; + } + + for (auto it = config.secondaries.begin(); it != config.secondaries.end(); it++) + { + statuses[*it] = PS_SECONDARY; + learners.erase(*it); + } + + for (auto it = learners.begin(); it != learners.end(); it++) + { + statuses[it->first] = PS_POTENTIAL_SECONDARY; + } + + for (auto it = config.drop_outs.begin(); it != config.drop_outs.end(); it++) + { + if (statuses.find(*it) == statuses.end()) + { + statuses[*it] = PS_INACTIVE; + } + } +} + +bool primary_context::get_replica_config(const end_point& node, __out_param replica_configuration& config) +{ + config.gpid = membership.gpid; + config.primary = membership.primary; + config.ballot = membership.ballot; + + auto it = statuses.find(node); + if (it != statuses.end()) + { + config.status = it->second; + return true; + } + else + { + config.status = PS_INACTIVE; + return false; + } +} + + +void primary_context::get_replica_config(partition_status st, __out_param replica_configuration& config) +{ + config.gpid = membership.gpid; + config.primary = membership.primary; + config.ballot = membership.ballot; + config.status = st; +} + +bool primary_context::check_exist(const end_point& node, partition_status st) +{ + switch (st) + { + case PS_PRIMARY: + return membership.primary == node; + case PS_SECONDARY: + return std::find(membership.secondaries.begin(), membership.secondaries.end(), node) != membership.secondaries.end(); + case PS_POTENTIAL_SECONDARY: + return learners.find(node) != learners.end(); + case PS_INACTIVE: + return std::find(membership.drop_outs.begin(), membership.drop_outs.end(), node) != membership.drop_outs.end(); + default: + dassert (false, ""); + return false; + } +} + +bool potential_secondary_context::cleanup(bool force) +{ + if (learn_remote_files_task != nullptr) + { + bool clean_remote_learning = learn_remote_files_task->cancel(false); + if (force) + { + learn_remote_files_task->cancel(true); + } + else if (!clean_remote_learning) + { + return false; + } + } + + if (learning_task != nullptr) + { + learning_task->cancel(true); + } + + if (learn_remote_files_completed_task != nullptr) + { + learn_remote_files_completed_task->cancel(true); + } + + learning_signature = 0; + learning_round_is_running = false; + return true; +} + +}} // end namespace diff --git a/src/apps/replication/lib/replica_context.h b/src/apps/replication/lib/replica_context.h new file mode 100644 index 0000000000..8418ec8c3e --- /dev/null +++ b/src/apps/replication/lib/replica_context.h @@ -0,0 +1,104 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include "mutation.h" + +namespace dsn { namespace replication { + +struct remote_learner_state +{ + uint64_t signature; + task_ptr timeout_task; + decree prepare_start_decree; +}; + +typedef std::map learner_map; + +class primary_context +{ +public: + void cleanup(bool clean_pending_mutations = true); + + void reset_membership(const partition_configuration& config, bool clear_learners); + bool get_replica_config(const end_point& node, __out_param replica_configuration& config); + void get_replica_config(partition_status status, __out_param replica_configuration& config); + bool check_exist(const end_point& node, partition_status status); + partition_status GetNodeStatus(const end_point& addr) const; + + void do_cleanup_pending_mutations(bool clean_pending_mutations = true); + void CleanupGroupCheck(); + +public: + // membership mgr, including learners + partition_configuration membership; + NodeStatusMap statuses; + learner_map learners; + + // 2pc batching + mutation_ptr pending_mutation; + task_ptr pending_mutation_task; + + // group check + task_ptr group_check_task; + node_tasks group_check_pending_replies; + + // reconfig + task_ptr reconfiguration_task; +}; + + +class potential_secondary_context +{ +public: + potential_secondary_context() : + learning_signature(0), + learning_round_is_running(false), + LearningState(learner_status::Learning_INVALID) + {} + bool cleanup(bool force); + +public: + uint64_t learning_signature; + learner_status LearningState; + volatile bool learning_round_is_running; + + task_ptr learning_task; + task_ptr learn_remote_files_task; + task_ptr learn_remote_files_completed_task; + + +}; + +//---------------inline impl---------------------------------------------------------------- + +inline partition_status primary_context::GetNodeStatus(const end_point& addr) const +{ + auto it = statuses.find(addr); + return it != statuses.end() ? it->second : PS_INACTIVE; +} + +}} // end namespace diff --git a/src/apps/replication/lib/replica_failover.cpp b/src/apps/replication/lib/replica_failover.cpp new file mode 100644 index 0000000000..5a9f5eaf47 --- /dev/null +++ b/src/apps/replication/lib/replica_failover.cpp @@ -0,0 +1,112 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replica.h" +#include "mutation.h" +#include "mutation_log.h" +#include "replica_stub.h" + +#define __TITLE__ "FailOver" + +namespace dsn { namespace replication { + +void replica::handle_local_failure(int error) +{ + ddebug( + "%s: handle local failure error %x, status = %s", + name(), + error, + enum_to_string(status()) + ); + + if (status() == PS_PRIMARY) + { + _stub->remove_replica_on_meta_server(_primary_states.membership); + } + + update_local_configuration_with_no_ballot_change(PS_ERROR); +} + +void replica::handle_remote_failure(partition_status st, const end_point& node, int error) +{ + ddebug( + "%s: handle remote failure error %u, status = %s, node = %s:%d", + name(), + error, + enum_to_string(st), + node.name.c_str(), static_cast(node.port) + ); + + dassert (status() == PS_PRIMARY, ""); + dassert(node != primary_address(), ""); + + switch (st) + { + case PS_SECONDARY: + dassert (_primary_states.check_exist(node, PS_SECONDARY), ""); + { + configuration_update_request request; + request.node = node; + request.type = CT_DOWNGRADE_TO_INACTIVE; + request.config = _primary_states.membership; + downgrade_to_inactive_on_primary(request); + } + break; + case PS_POTENTIAL_SECONDARY: + // potential secondary failure does not lead to ballot change + // therefore, it is possible to have multiple exec here + if (_primary_states.learners.erase(node) > 0) + { + if (_primary_states.check_exist(node, PS_INACTIVE)) + _primary_states.statuses[node] = PS_INACTIVE; + else + _primary_states.statuses.erase(node); + } + + break; + case PS_INACTIVE: + case PS_ERROR: + break; + default: + dassert (false, ""); + break; + } +} + +void replica::on_meta_server_disconnected() +{ + ddebug( "%s: meta server disconnected", name()); + + auto old_status = status(); + update_local_configuration_with_no_ballot_change(PS_INACTIVE); + + // make sure they can be back directly + if (old_status == PS_PRIMARY || old_status == PS_SECONDARY) + { + set_inactive_state_transient(true); + } +} + +}} // namespace diff --git a/src/apps/replication/lib/replica_init.cpp b/src/apps/replication/lib/replica_init.cpp new file mode 100644 index 0000000000..6cb255b698 --- /dev/null +++ b/src/apps/replication/lib/replica_init.cpp @@ -0,0 +1,200 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replica.h" +#include "mutation.h" +#include "mutation_log.h" +#include "replica_stub.h" +#include +#include + +#define __TITLE__ "init" + +namespace dsn { namespace replication { + +using namespace dsn::service; + +int replica::initialize_on_new(const char* app_type, global_partition_id gpid) +{ + char buffer[256]; + sprintf(buffer, "%u.%u.%s", gpid.app_id, gpid.pidx, app_type); + + _config.gpid = gpid; + _dir = _stub->dir() + "/" + buffer; + + if (boost::filesystem::exists(_dir)) + { + return ERR_PATH_ALREADY_EXIST; + } + + boost::filesystem::create_directory(_dir); + + int err = init_app_and_prepare_list(app_type, true); + dassert (err == ERR_SUCCESS, ""); + return err; +} + +/*static*/ replica* replica::newr(replica_stub* stub, const char* app_type, global_partition_id gpid, replication_options& options) +{ + replica* rep = new replica(stub, gpid, options); + if (ERR_SUCCESS == rep->initialize_on_new(app_type, gpid)) + return rep; + else + { + delete rep; + return nullptr; + } +} + +int replica::initialize_on_load(const char* dir, bool renameDirOnFailure) +{ + std::string dr(dir); + auto pos = dr.find_last_of('/'); + if (pos == std::string::npos) + { + derror( "invalid replica dir %s", dir); + return ERR_PATH_NOT_FOUND; + } + + char app_type[128]; + global_partition_id gpid; + std::string name = dr.substr(pos + 1); + if (3 != sscanf(name.c_str(), "%u.%u.%s", &gpid.app_id, &gpid.pidx, app_type)) + { + derror( "invalid replica dir %s", dir); + return ERR_PATH_NOT_FOUND; + } + + _config.gpid = gpid; + _dir = dr; + + int err = init_app_and_prepare_list(app_type, false); + + if (ERR_SUCCESS != err && renameDirOnFailure) + { + // GCed later + char newPath[256]; + sprintf(newPath, "%s.%x.err", dir, random32(0, (uint32_t)-1)); + boost::filesystem::remove_all(newPath); + boost::filesystem::rename(dir, newPath); + derror( "move bad replica from '%s' to '%s'", dir, newPath); + } + + return err; +} + + +/*static*/ replica* replica::load(replica_stub* stub, const char* dir, replication_options& options, bool renameDirOnFailure) +{ + replica* rep = new replica(stub, options); + int err = rep->initialize_on_load(dir, renameDirOnFailure); + if (err != ERR_SUCCESS) + { + delete rep; + return nullptr; + } + else + { + return rep; + } +} + +int replica::init_app_and_prepare_list(const char* app_type, bool create_new) +{ + dassert (nullptr == _app, ""); + + _app = ::dsn::utils::factory_store::create(app_type, PROVIDER_TYPE_MAIN, this, _stub->config()); + if (nullptr == _app) + { + return ERR_OBJECT_NOT_FOUND; + } + dassert (nullptr != _app, ""); + + int err = _app->open(create_new); + if (ERR_SUCCESS == err) + { + dassert (_app->last_durable_decree() == _app->last_committed_decree(), ""); + _prepare_list->reset(_app->last_committed_decree()); + } + else + { + derror( "open replica '%s' under '%s' failed, err = %x", app_type, dir().c_str(), err); + delete _app; + _app = nullptr; + } + + sprintf(_name, "%u.%u @ %s:%d", _config.gpid.app_id, _config.gpid.pidx, primary_address().name.c_str(), + static_cast(primary_address().port)); + + return err; +} + +void replica::replay_mutation(mutation_ptr& mu) +{ + if (mu->data.header.decree <= last_committed_decree() || + mu->data.header.ballot < get_ballot()) + return; + + if (mu->data.header.ballot > get_ballot()) + { + _config.ballot = mu->data.header.ballot; + update_local_configuration(_config); + } + + // prepare + /*ddebug( + "%u.%u @ %s:%d: replay mutation ballot = %llu, decree = %llu, last_committed_decree = %llu", + get_gpid().app_id, get_gpid().pidx, + address().name.c_str(), static_castaddress().port, + mu->data.header.ballot, + mu->data.header.decree, + mu->data.header.last_committed_decree + );*/ + + int err = _prepare_list->prepare(mu, PS_INACTIVE); + dassert (err == ERR_SUCCESS, ""); +} + +void replica::set_inactive_state_transient(bool t) +{ + if (status() == PS_INACTIVE) + { + _inactive_is_transient = t; + } +} + +void replica::reset_prepare_list_after_replay() +{ + if (_prepare_list->min_decree() > _app->last_committed_decree() + 1) + { + _prepare_list->reset(_app->last_committed_decree()); + } + else + { + _prepare_list->truncate(_app->last_committed_decree()); + } +} + +}} // namespace diff --git a/src/apps/replication/lib/replica_learn.cpp b/src/apps/replication/lib/replica_learn.cpp new file mode 100644 index 0000000000..54c2fe2e54 --- /dev/null +++ b/src/apps/replication/lib/replica_learn.cpp @@ -0,0 +1,389 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replica.h" +#include "mutation.h" +#include "mutation_log.h" +#include "replica_stub.h" +#include + +#define __TITLE__ "Learn" + +namespace dsn { namespace replication { + +void replica::init_learn(uint64_t signature) +{ + check_hashed_access(); + + dassert (status() == PS_POTENTIAL_SECONDARY, ""); + + // at most one learning task running + if (_potential_secondary_states.learning_round_is_running || !signature) + return; + + if (signature != _potential_secondary_states.learning_signature) + { + _potential_secondary_states.cleanup(true); + _potential_secondary_states.learning_signature = signature; + _potential_secondary_states.LearningState = LearningWithoutPrepare; + _prepare_list->reset(_app->last_committed_decree()); + } + else + { + switch (_potential_secondary_states.LearningState) + { + case LearningSucceeded: + notify_learn_completion(); + return; + case LearningFailed: + break; + case LearningWithPrepare: + if (_app->last_durable_decree() >= last_committed_decree()) + { + _potential_secondary_states.LearningState = LearningSucceeded; + notify_learn_completion(); + return; + } + break; + case LearningWithoutPrepare: + break; + default: + dassert (false, ""); + } + } + + _potential_secondary_states.learning_round_is_running = true; + + std::shared_ptr request(new learn_request); + request->gpid = get_gpid(); + request->last_committed_decree_in_app = _app->last_committed_decree(); + request->last_committed_decree_in_prepare_list = _prepare_list->last_committed_decree(); + request->learner = primary_address(); + request->signature = _potential_secondary_states.learning_signature; + _app->prepare_learning_request(request->app_specific_learn_request); + + _potential_secondary_states.learning_task = rpc::call_typed( + _config.primary, + RPC_LEARN, + request, + this, + &replica::on_learn_reply, + gpid_to_hash(get_gpid()) + ); + + ddebug( + "%s: init_learn with lastAppC/DDecree = <%llu,%llu>, lastCDecree = %llu, learnState = %s", + name(), + _app->last_committed_decree(), + _app->last_durable_decree(), + last_committed_decree(), + enum_to_string(_potential_secondary_states.LearningState) + ); +} + +void replica::on_learn(const learn_request& request, __out_param learn_response& response) +{ + check_hashed_access(); + + if (PS_PRIMARY != status()) + { + response.err = ERR_INVALID_STATE; + return; + } + + if (request.last_committed_decree_in_app > last_committed_decree()) + { + ddebug( + "%s: on_learn %s:%d, learner state is lost due to DDD, with its appCommittedDecree = %llu vs localCommitedDecree %llu", + name(), + request.learner.name.c_str(), static_cast(request.learner.port), + request.last_committed_decree_in_app, + last_committed_decree() + ); + ((learn_request&)request).last_committed_decree_in_app = 0; + } + + _primary_states.get_replica_config(request.learner, response.config); + + auto it = _primary_states.learners.find(request.learner); + if (it == _primary_states.learners.end()) + { + response.err = (response.config.status == PS_SECONDARY ? ERR_SUCCESS : ERR_OBJECT_NOT_FOUND); + return; + } + else if (it->second.signature != request.signature) + { + response.err = ERR_OBJECT_NOT_FOUND; + return; + } + + ddebug( + "%s: on_learn %s:%d with its appCommittedDecree = %llu vs localCommitedDecree %llu", + name(), + request.learner.name.c_str(), static_cast(request.learner.port), + request.last_committed_decree_in_app, + last_committed_decree() + ); + + response.prepare_start_decree = invalid_decree; + response.commit_decree = last_committed_decree(); + response.err = ERR_SUCCESS; + + if (request.last_committed_decree_in_app + _options.staleness_for_start_prepare_for_potential_secondary >= last_committed_decree()) + { + if (it->second.prepare_start_decree == invalid_decree) + { + it->second.prepare_start_decree = last_committed_decree() + 1; + + cleanup_preparing_mutations(true); + replay_prepare_list(); + + ddebug( + "%s: on_learn with prepare_start_decree = %llu for %s:%d", + name(), + last_committed_decree() + 1, + request.learner.name.c_str(), static_cast(request.learner.port) + ); + } + + response.prepare_start_decree = it->second.prepare_start_decree; + } + else + { + it->second.prepare_start_decree = invalid_decree; + } + + decree decree = request.last_committed_decree_in_app + 1; + response.err = _app->get_learn_state(decree, request.app_specific_learn_request, response.state); + + response.base_local_dir = _dir; + for (auto itr = response.state.files.begin(); itr != response.state.files.end(); ++itr) + *itr = itr->substr(_dir.length()); +} + +void replica::on_learn_reply(error_code err, std::shared_ptr& req, std::shared_ptr& resp) +{ + check_hashed_access(); + + dassert (PS_POTENTIAL_SECONDARY == status(), ""); + dassert (req->signature == _potential_secondary_states.learning_signature, ""); + + if (resp == nullptr) + { + handle_learning_error(ERR_TIMEOUT); + return; + } + + ddebug( + "%s: on_learn_reply with err = 0x%x, prepare_start_decree = %llu, current learnState = %s", + name(), resp->err, resp->prepare_start_decree, enum_to_string(_potential_secondary_states.LearningState) + ); + + if (resp->err != ERR_SUCCESS) + { + handle_learning_error(resp->err); + return; + } + + if (resp->config.ballot > get_ballot()) + { + update_local_configuration(resp->config); + } + + if (status() != PS_POTENTIAL_SECONDARY) + { + return; + } + + if (resp->prepare_start_decree != invalid_decree && _potential_secondary_states.LearningState == LearningWithoutPrepare) + { + _potential_secondary_states.LearningState = LearningWithPrepare; + _prepare_list->reset(resp->prepare_start_decree - 1); + } + + _potential_secondary_states.learn_remote_files_task = tasking::enqueue( + LPC_LEARN_REMOTE_DELTA_FILES, + this, + std::bind(&replica::on_learn_remote_state, this, resp) + ); +} + +void replica::on_learn_remote_state(std::shared_ptr resp) +{ + int err = ERR_SUCCESS; + + // + // TODO: copy files using data bus service instead + // + learn_state localState; + localState.meta = resp->state.meta; + + end_point& server = resp->config.primary; + + if (!resp->state.files.empty()) + { + file::copy_remote_files(server, resp->base_local_dir, resp->state.files, _dir, true, LPC_AIO_TEST, nullptr, nullptr); + } + + if (err == ERR_SUCCESS) + { + for (auto itr = resp->state.files.begin(); itr != resp->state.files.end(); ++itr) + { + std::string file; + if (dir().back() == '/' || itr->front() == '/') + file = dir() + *itr; + else + file = dir() + '/' + *itr; + + localState.files.push_back(file); + } + + // the only place where there is non-in-partition-thread update + decree oldDecree = _app->last_committed_decree(); + + err = _app->apply_learn_state(resp->state); + + ddebug( + "%s: learning %d files to %s, err = %x, " + "appCommit(%llu => %llu), durable(%llu), remoteC(%llu), prepStart(%llu), state(%s)", + name(), + resp->state.files.size(), _dir.c_str(), err, + oldDecree, _app->last_committed_decree(), + _app->last_durable_decree(), + resp->commit_decree, + resp->prepare_start_decree, + enum_to_string(_potential_secondary_states.LearningState) + ); + + if (err == ERR_SUCCESS && _app->last_committed_decree() >= resp->commit_decree) + { + err = _app->flush(true); + if (err == ERR_SUCCESS) + { + dassert (_app->last_committed_decree() == _app->last_durable_decree(), ""); + } + } + } + else + { + derror( + "%s: Transfer %d files to %s failed, err = %d", + name(), + resp->state.files.size(), _dir.c_str(), err); + } + + _potential_secondary_states.learn_remote_files_completed_task = tasking::enqueue( + LPC_LEARN_REMOTE_DELTA_FILES_COMPLETED, + this, + std::bind(&replica::on_learn_remote_state_completed, this, err), + gpid_to_hash(get_gpid()) + ); +} + +void replica::on_learn_remote_state_completed(int err) +{ + check_hashed_access(); + + if (PS_POTENTIAL_SECONDARY != status()) + return; + + _potential_secondary_states.learning_round_is_running = false; + + if (err != ERR_SUCCESS) + { + handle_learning_error(err); + } + else + { + // continue + init_learn(_potential_secondary_states.learning_signature); + } +} + +void replica::handle_learning_error(int err) +{ + check_hashed_access(); + + dwarn( + "%s: learning failed with err = 0x%X, LastCommitted = %lld", + name(), + err, + _app->last_committed_decree() + ); + + _potential_secondary_states.cleanup(true); + _potential_secondary_states.LearningState = LearningFailed; + + update_local_configuration_with_no_ballot_change(PS_ERROR); +} + +void replica::handle_learning_succeeded_on_primary(const end_point& node, uint64_t learnSignature) +{ + auto it = _primary_states.learners.find(node); + if (it != _primary_states.learners.end() && it->second.signature == learnSignature) + upgrade_to_secondary_on_primary(node); +} + +void replica::notify_learn_completion() +{ + group_check_response report; + report.gpid = get_gpid(); + report.err = ERR_SUCCESS; + report.last_committed_decree_in_app = _app->last_committed_decree(); + report.last_committed_decree_in_prepare_list = last_committed_decree(); + report.learner_signature = _potential_secondary_states.learning_signature; + report.learner_status_ = _potential_secondary_states.LearningState; + report.node = primary_address(); + + rpc::call_one_way_typed(_config.primary, RPC_LEARN_COMPLETITION_NOTIFY, report, gpid_to_hash(get_gpid())); +} + +void replica::on_learn_completion_notification(const group_check_response& report) +{ + check_hashed_access(); + if (status() != PS_PRIMARY) + return; + + if (report.learner_status_ == LearningSucceeded) + { + handle_learning_succeeded_on_primary(report.node, report.learner_signature); + } +} + +void replica::on_add_learner(const group_check_request& request) +{ + if (request.config.ballot < get_ballot()) + return; + + if (request.config.ballot > get_ballot() + || is_same_ballot_status_change_allowed(status(), request.config.status)) + { + update_local_configuration(request.config, true); + dassert(PS_POTENTIAL_SECONDARY == status(), ""); + init_learn(request.learner_signature); + } +} + +}} // namespace diff --git a/src/apps/replication/lib/replica_stub.cpp b/src/apps/replication/lib/replica_stub.cpp new file mode 100644 index 0000000000..5070f97361 --- /dev/null +++ b/src/apps/replication/lib/replica_stub.cpp @@ -0,0 +1,943 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replica.h" +#include "replica_stub.h" +#include "mutation_log.h" +#include "mutation.h" +#include "replication_failure_detector.h" +#include "rpc_replicated.h" +#include + +#define __TITLE__ "Stub" + +namespace dsn { namespace replication { + +using namespace dsn::service; + +replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/, bool is_long_subscriber/* = true*/) + : serverlet("replica_stub") +{ + _replica_state_subscriber = subscriber; + _is_long_subscriber = is_long_subscriber; + _failure_detector = nullptr; + _state = NS_Disconnected; +} + +replica_stub::~replica_stub(void) +{ + close(); +} + +void replica_stub::initialize(configuration_ptr config, bool clear/* = false*/) +{ + replication_options opts; + opts.initialize(config); + initialize(opts, config, clear); +} + +void replica_stub::initialize(const replication_options& opts, configuration_ptr config, bool clear/* = false*/) +{ + zauto_lock l(_repicas_lock); + + _config = config; + + // init perf counters + //PerformanceCounters::init(PerfCounters_ReplicationBegin, PerfCounters_ReplicationEnd); + + // init dirs + set_options(opts); + _dir = _options.working_dir; + if (clear) + { + boost::filesystem::remove_all(_dir); + } + + if (!boost::filesystem::exists(_dir)) + { + boost::filesystem::create_directory(_dir); + } + + _dir = boost::filesystem::canonical(boost::filesystem::path(_dir)).string(); + std::string logDir = _dir + "/log"; + if (!boost::filesystem::exists(logDir)) + { + boost::filesystem::create_directory(logDir); + } + + // init rps + boost::filesystem::directory_iterator endtr; + replicas rps; + + for (boost::filesystem::directory_iterator it(dir()); + it != endtr; + ++it) + { + auto name = it->path().string(); + if (name.length() >= 4 && + (name.substr(name.length() - strlen("log")) == "log" || + name.substr(name.length() - strlen(".err")) == ".err") + ) + continue; + + auto r = replica::load(this, name.c_str(), _options, true); + if (r != nullptr) + { + ddebug( "%u.%u @ %s:%d: load replica success with durable decree = %llu from '%s'", + r->get_gpid().app_id, r->get_gpid().pidx, + primary_address().name.c_str(), static_cast(primary_address().port), + r->last_durable_decree(), + name.c_str() + ); + rps[r->get_gpid()] = r; + } + } + + // init logs + _log = new mutation_log(opts.log_buffer_size_mb, opts.log_pending_max_ms, opts.log_file_size_mb, opts.log_batch_write, opts.log_max_concurrent_writes); + int err = _log->initialize(logDir.c_str()); + dassert (err == ERR_SUCCESS, ""); + + err = _log->replay( + std::bind(&replica_stub::replay_mutation, this, std::placeholders::_1, &rps) + ); + + for (auto it = rps.begin(); it != rps.end(); it++) + { + it->second->reset_prepare_list_after_replay(); + + derror( + "%u.%u @ %s:%d: initialized durable = %lld, committed = %llu, maxpd = %llu, ballot = %llu", + it->first.app_id, it->first.pidx, + primary_address().name.c_str(), static_cast(primary_address().port), + it->second->last_durable_decree(), + it->second->last_committed_decree(), + it->second->max_prepared_decree(), + it->second->get_ballot() + ); + + it->second->set_inactive_state_transient(err == ERR_SUCCESS); + } + + // start log serving + if (false == _options.gc_disabled) + { + _gc_timer_task = tasking::enqueue( + LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS, + this, + &replica_stub::on_gc, + 0, + random32(0, _options.gc_interval_ms), + _options.gc_interval_ms + ); + } + + multi_partition_decrees initMaxDecrees; // for log truncate + for (auto it = rps.begin(); it != rps.end(); it++) + { + initMaxDecrees[it->second->get_gpid()] = it->second->max_prepared_decree(); + } + err = _log->start_write_service(initMaxDecrees, _options.staleness_for_commit); + dassert (err == ERR_SUCCESS, ""); + + // attach rps + _replicas = rps; + + rps.clear(); + + // start timer for configuration sync + if (!_options.config_sync_disabled) + { + _config_sync_timer_task = tasking::enqueue( + LPC_QUERY_CONFIGURATION_ALL, + this, + &replica_stub::query_configuration_by_node, + 0, + _options.config_sync_interval_ms, + _options.config_sync_interval_ms + ); + } + + // init livenessmonitor + dassert (NS_Disconnected == _state, ""); + if (_options.fd_disabled == false) + { + _failure_detector = new replication_failure_detector(this, _options.meta_servers); + _failure_detector->start( + _options.fd_check_interval_seconds, + _options.fd_beacon_interval_seconds, + _options.fd_lease_seconds, + _options.fd_grace_seconds + ); + _failure_detector->register_master(_failure_detector->current_server_contact()); + } + else + { + _state = NS_Connected; + } +} + +void replica_stub::replay_mutation(mutation_ptr& mu, replicas* rps) +{ + auto it = rps->find(mu->data.header.gpid); + if (it != rps->end()) + { + it->second->replay_mutation(mu); + } +} + +replica_ptr replica_stub::get_replica(global_partition_id gpid, bool new_when_possible, const char* app_type) +{ + zauto_lock l(_repicas_lock); + auto it = _replicas.find(gpid); + if (it != _replicas.end()) + return it->second; + else + { + if (!new_when_possible) + return nullptr; + else + { + dassert (app_type, ""); + replica* rep = replica::newr(this, app_type, gpid, _options); + if (rep != nullptr) + { + add_replica(rep); + } + return rep; + } + } +} + +replica_ptr replica_stub::get_replica(int32_t app_id, int32_t partition_index) +{ + global_partition_id gpid; + gpid.app_id = app_id; + gpid.pidx = partition_index; + return get_replica(gpid); +} + +void replica_stub::get_primary_replica_list(uint32_t p_tableID, std::vector& p_repilcaList) +{ + zauto_lock l(_repicas_lock); + for (auto it = _replicas.begin(); it != _replicas.end(); it++) + { + if (it->second->status() == PS_PRIMARY + && (p_tableID == (uint32_t)-1 + || it->second->get_gpid().app_id == static_cast(p_tableID) )) + { + p_repilcaList.push_back(it->second->get_gpid()); + } + } +} + +void replica_stub::on_client_write(message_ptr& request) +{ + write_request_header hdr; + unmarshall(request, hdr); + + replica_ptr rep = get_replica(hdr.gpid); + if (rep != nullptr) + { + //PerformanceCounters::Increment(PerfCounters_TotalClientWriteQps, nullptr); + rep->on_client_write(hdr.code, request); + } + else + { + response_client_error(request, ERR_OBJECT_NOT_FOUND); + } +} + +void replica_stub::on_client_read(message_ptr& request) +{ + read_request_header req; + unmarshall(request, req); + + replica_ptr rep = get_replica(req.gpid); + if (rep != nullptr) + { + //PerformanceCounters::Increment(PerfCounters_TotalClientReadQps, nullptr); + rep->on_client_read(req, request); + } + else + { + response_client_error(request, ERR_OBJECT_NOT_FOUND); + } +} + +void replica_stub::on_config_proposal(const configuration_update_request& proposal) +{ + if (!is_connected()) return; + + replica_ptr rep = get_replica(proposal.config.gpid, proposal.type == CT_ASSIGN_PRIMARY, proposal.config.app_type.c_str()); + if (rep == nullptr && proposal.type == CT_ASSIGN_PRIMARY) + { + begin_open_replica(proposal.config.app_type, proposal.config.gpid); + } + + if (rep != nullptr) + { + rep->on_config_proposal((configuration_update_request&)proposal); + } +} + +void replica_stub::on_query_decree(const query_replica_decree_request& req, __out_param query_replica_decree_response& resp) +{ + replica_ptr rep = get_replica(req.gpid); + if (rep != nullptr) + { + resp.err = ERR_SUCCESS; + if (PS_POTENTIAL_SECONDARY == rep->status()) + { + resp.last_decree = 0; + } + else + { + resp.last_decree = rep->last_committed_decree(); + // TODO: use the following to alleviate data lost + //resp.last_decree = rep->last_prepared_decree(); + } + } + else + { + resp.err = ERR_OBJECT_NOT_FOUND; + resp.last_decree = 0; + } +} + +void replica_stub::on_prepare(message_ptr& request) +{ + global_partition_id gpid; + unmarshall(request, gpid); + replica_ptr rep = get_replica(gpid); + if (rep != nullptr) + { + rep->on_prepare(request); + } + else + { + prepare_ack resp; + resp.gpid = gpid; + resp.err = ERR_OBJECT_NOT_FOUND; + reply(request, resp); + } +} + +void replica_stub::on_group_check(const group_check_request& request, __out_param group_check_response& response) +{ + if (!is_connected()) return; + + replica_ptr rep = get_replica(request.config.gpid, request.config.status == PS_POTENTIAL_SECONDARY, request.app_type.c_str()); + if (rep != nullptr) + { + rep->on_group_check(request, response); + } + else + { + if (request.config.status == PS_POTENTIAL_SECONDARY) + { + std::shared_ptr req(new group_check_request); + *req = request; + + begin_open_replica(request.app_type, request.config.gpid, req); + response.err = ERR_SUCCESS; + response.learner_signature = 0; + } + else + { + response.err = ERR_OBJECT_NOT_FOUND; + } + } +} + +void replica_stub::on_learn(const learn_request& request, __out_param learn_response& response) +{ + replica_ptr rep = get_replica(request.gpid); + if (rep != nullptr) + { + rep->on_learn(request, response); + } + else + { + response.err = ERR_OBJECT_NOT_FOUND; + } +} + +void replica_stub::on_learn_completion_notification(const group_check_response& report) +{ + replica_ptr rep = get_replica(report.gpid); + if (rep != nullptr) + { + rep->on_learn_completion_notification(report); + } +} + +void replica_stub::on_add_learner(const group_check_request& request) +{ + replica_ptr rep = get_replica(request.config.gpid, true, request.app_type.c_str()); + if (rep != nullptr) + { + rep->on_add_learner(request); + } + else + { + std::shared_ptr req(new group_check_request); + *req = request; + begin_open_replica(request.app_type, request.config.gpid, req); + } +} + +void replica_stub::on_remove(const replica_configuration& request) +{ + replica_ptr rep = get_replica(request.gpid); + if (rep != nullptr) + { + rep->on_remove(request); + } +} + +void replica_stub::query_configuration_by_node() +{ + if (_state == NS_Disconnected) + { + return; + } + + if (_config_query_task != nullptr) + { + _config_query_task->cancel(false); + } + + message_ptr msg = message::create_request(RPC_CM_CALL); + + meta_request_header hdr; + hdr.rpc_tag = RPC_CM_QUERY_NODE_PARTITIONS; + marshall(msg, hdr); + + configuration_query_by_node_request req; + req.node = primary_address(); + marshall(msg, req); + + _config_query_task = rpc::call_replicated( + _failure_detector->current_server_contact(), + _failure_detector->get_servers(), + msg, + this, + std::bind(&replica_stub::on_node_query_reply, this, + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3 + ) + ); +} + +void replica_stub::on_meta_server_connected() +{ + ddebug( + "%s:%d: meta server connected", + primary_address().name.c_str(), static_cast(primary_address().port) + ); + + zauto_lock l(_repicas_lock); + if (_state == NS_Disconnected) + { + _state = NS_Connecting; + query_configuration_by_node(); + } +} + +void replica_stub::on_node_query_reply(int err, message_ptr& request, message_ptr& response) +{ + ddebug( + "%s:%d: node view replied", + primary_address().name.c_str(), static_cast(primary_address().port) + ); + + if (response == nullptr) + { + zauto_lock l(_repicas_lock); + if (_state == NS_Connecting) + { + query_configuration_by_node(); + } + } + else + { + zauto_lock l(_repicas_lock); + if (_state == NS_Connecting) + { + _state = NS_Connected; + } + + // DO NOT UPDATE STATE WHEN DISCONNECTED + if (_state != NS_Connected) + return; + + configuration_query_by_node_response resp; + + unmarshall(response, resp); + + replicas rs = _replicas; + for (auto it = resp.partitions.begin(); it != resp.partitions.end(); it++) + { + rs.erase(it->gpid); + tasking::enqueue( + LPC_QUERY_NODE_CONFIGURATION_SCATTER, + this, + std::bind(&replica_stub::on_node_query_reply_scatter, this, this, *it), + gpid_to_hash(it->gpid) + ); + } + + // for rps not exist on meta_servers + for (auto it = rs.begin(); it != rs.end(); it++) + { + tasking::enqueue( + LPC_QUERY_NODE_CONFIGURATION_SCATTER, + this, + std::bind(&replica_stub::on_node_query_reply_scatter2, this, this, it->first), + gpid_to_hash(it->first) + ); + } + } +} + +void replica_stub::set_meta_server_connected_for_test(const configuration_query_by_node_response& resp) +{ + zauto_lock l(_repicas_lock); + dassert (_state != NS_Connected, ""); + _state = NS_Connected; + + for (auto it = resp.partitions.begin(); it != resp.partitions.end(); it++) + { + tasking::enqueue( + LPC_QUERY_NODE_CONFIGURATION_SCATTER, + this, + std::bind(&replica_stub::on_node_query_reply_scatter, this, this, *it), + gpid_to_hash(it->gpid) + ); + } +} + +// this_ is used to hold a ref to replica_stub so we don't need to cancel the task on replica_stub::close +void replica_stub::on_node_query_reply_scatter(replica_stub_ptr this_, const partition_configuration& config) +{ + replica_ptr replica = get_replica(config.gpid); + if (replica != nullptr) + { + replica->on_config_sync(config); + } + else + { + + ddebug( + "%u.%u @ %s:%d: replica not exists on replica server, remove it from meta server", + config.gpid.app_id, config.gpid.pidx, + primary_address().name.c_str(), static_cast(primary_address().port) + ); + + remove_replica_on_meta_server(config); + } +} + +void replica_stub::on_node_query_reply_scatter2(replica_stub_ptr this_, global_partition_id gpid) +{ + replica_ptr replica = get_replica(gpid); + if (replica != nullptr) + { + ddebug( + "%u.%u @ %s:%d: replica not exists on meta server, removed", + gpid.app_id, gpid.pidx, + primary_address().name.c_str(), static_cast(primary_address().port) + ); + replica->update_local_configuration_with_no_ballot_change(PS_ERROR); + } +} + +void replica_stub::remove_replica_on_meta_server(const partition_configuration& config) +{ + message_ptr msg = message::create_request(RPC_CM_CALL); + meta_request_header hdr; + hdr.rpc_tag = RPC_CM_UPDATE_PARTITION_CONFIGURATION; + marshall(msg, hdr); + + std::shared_ptr request(new configuration_update_request); + request->config = config; + request->config.ballot++; + request->node = primary_address(); + request->type = CT_DOWNGRADE_TO_INACTIVE; + + if (primary_address() == config.primary) + { + request->config.primary = dsn::end_point::INVALID; + } + else if (replica_helper::remove_node(primary_address(), request->config.secondaries)) + { + } + else + { + return; + } + + marshall(msg, *request); + + rpc::call_replicated( + _failure_detector->current_server_contact(), + _failure_detector->get_servers(), + msg, + nullptr, + nullptr + ); +} + +void replica_stub::on_meta_server_disconnected() +{ + ddebug( + "%s:%d: meta server disconnected", + primary_address().name.c_str(), static_cast(primary_address().port) + ); + zauto_lock l(_repicas_lock); + if (NS_Disconnected == _state) + return; + + _state = NS_Disconnected; + + for (auto it = _replicas.begin(); it != _replicas.end(); it++) + { + tasking::enqueue( + LPC_CM_DISCONNECTED_SCATTER, + this, + std::bind(&replica_stub::on_meta_server_disconnected_scatter, this, this, it->first), + gpid_to_hash(it->first) + ); + } +} + +// this_ is used to hold a ref to replica_stub so we don't need to cancel the task on replica_stub::close +void replica_stub::on_meta_server_disconnected_scatter(replica_stub_ptr this_, global_partition_id gpid) +{ + { + zauto_lock l(_repicas_lock); + if (_state != NS_Disconnected) + return; + } + + replica_ptr replica = get_replica(gpid); + if (replica != nullptr) + { + replica->on_meta_server_disconnected(); + } +} + +void replica_stub::response_client_error(message_ptr& request, int error) +{ + message_ptr resp = request->create_response(); + resp->writer().write(error); + rpc::reply(resp); +} + +void replica_stub::init_gc_for_test() +{ + dassert (_options.gc_disabled, ""); + + _gc_timer_task = tasking::enqueue( + LPC_GARBAGE_COLLECT_LOGS_AND_REPLICAS, + this, + &replica_stub::on_gc, + 0, + _options.gc_interval_ms + ); +} + +void replica_stub::on_gc() +{ + replicas rs; + { + zauto_lock l(_repicas_lock); + rs = _replicas; + } + + // gc log + multi_partition_decrees durable_decrees; + for (auto it = rs.begin(); it != rs.end(); it++) + { + durable_decrees[it->first] = it->second->last_durable_decree(); + } + _log->garbage_collection(durable_decrees); + + // gc on-disk rps + boost::filesystem::directory_iterator endtr; + for (boost::filesystem::directory_iterator it(dir()); + it != endtr; + ++it) + { + auto name = it->path().filename().string(); + if (name.length() > strlen(".err") && name.substr(name.length() - strlen(".err")) == ".err") + { + std::time_t mt = boost::filesystem::last_write_time(it->path()); + if (mt > time(0) + _options.gc_disk_error_replica_interval_seconds) + { + boost::filesystem::remove_all(_dir + "/" + name); + } + } + } +} + +task_ptr replica_stub::begin_open_replica(const std::string& app_type, global_partition_id gpid, std::shared_ptr req) +{ + _repicas_lock.lock(); + if (_replicas.find(gpid) != _replicas.end()) + { + _repicas_lock.unlock(); + return nullptr; + } + + auto it = _opening_replicas.find(gpid); + if (it != _opening_replicas.end()) + { + _repicas_lock.unlock(); + return nullptr; + } + else + { + auto it2 = _closing_replicas.find(gpid); + if (it2 != _closing_replicas.end()) + { + if (it2->second.second->status() == PS_INACTIVE + && it2->second.first->cancel(false)) + { + replica_ptr r = it2->second.second; + _closing_replicas.erase(it2); + add_replica(r); + + // unlock here to avoid dead lock + _repicas_lock.unlock(); + + ddebug( "open replica which is to be closed '%s.%u.%u'", app_type.c_str(), gpid.app_id, gpid.pidx); + + if (req != nullptr) + { + on_add_learner(*req); + } + return nullptr; + } + else + { + _repicas_lock.unlock(); + dwarn( "open replica '%s.%u.%u' failed coz replica is under closing", + app_type.c_str(), gpid.app_id, gpid.pidx); + return nullptr; + } + } + else + { + auto task = tasking::enqueue(LPC_OPEN_REPLICA, this, std::bind(&replica_stub::open_replica, this, app_type, gpid, req)); + _opening_replicas[gpid] = task; + _repicas_lock.unlock(); + return task; + } + } +} + +void replica_stub::open_replica(const std::string app_type, global_partition_id gpid, std::shared_ptr req) +{ + char buffer[256]; + sprintf(buffer, "%u.%u.%s", gpid.app_id, gpid.pidx, app_type.c_str()); + + std::string dr = dir() + "/" + buffer; + + dwarn("open replica '%s'", dr.c_str()); + + replica_ptr rep = replica::load(this, dr.c_str(), _options, true); + if (rep == nullptr) rep = replica::newr(this, app_type.c_str(), gpid, _options); + dassert (rep != nullptr, ""); + + { + zauto_lock l(_repicas_lock); + auto it = _replicas.find(gpid); + dassert (it == _replicas.end(), ""); + add_replica(rep); + _opening_replicas.erase(gpid); + } + + if (nullptr != req) + { + rpc::call_one_way_typed(primary_address(), RPC_LEARN_ADD_LEARNER, *req, gpid_to_hash(req->config.gpid)); + } +} + +task_ptr replica_stub::begin_close_replica(replica_ptr r) +{ + zauto_lock l(_repicas_lock); + + // initialization is still ongoing + if (nullptr == _failure_detector) + return nullptr; + + if (remove_replica(r)) + { + auto task = tasking::enqueue(LPC_CLOSE_REPLICA, this, + std::bind(&replica_stub::close_replica, this, r), + 0, + r->status() == PS_ERROR ? 0 : _options.gc_memory_replica_interval_ms + ); + _closing_replicas[r->get_gpid()] = std::make_pair(task, r); + return task; + } + else + { + return nullptr; + } +} + +void replica_stub::close_replica(replica_ptr r) +{ + dwarn( "close replica '%s'", r->dir().c_str()); + + r->close(); + + { + zauto_lock l(_repicas_lock); + _closing_replicas.erase(r->get_gpid()); + } +} + +void replica_stub::add_replica(replica_ptr r) +{ + zauto_lock l(_repicas_lock); + _replicas[r->get_gpid()] = r; +} + +bool replica_stub::remove_replica(replica_ptr r) +{ + zauto_lock l(_repicas_lock); + if (_replicas.erase(r->get_gpid()) > 0) + { + return true; + } + else + return false; +} + +void replica_stub::notify_replica_state_update(const replica_configuration& config, bool isClosing) +{ + if (nullptr != _replica_state_subscriber) + { + if (_is_long_subscriber) + { + tasking::enqueue(LPC_REPLICA_STATE_CHANGE_NOTIFICATION, this, std::bind(_replica_state_subscriber, primary_address(), config, isClosing)); + } + else + { + _replica_state_subscriber(primary_address(), config, isClosing); + } + } +} + +void replica_stub::open_service() +{ + register_rpc_handler(RPC_REPLICATION_CLIENT_WRITE, "write", &replica_stub::on_client_write); + register_rpc_handler(RPC_REPLICATION_CLIENT_READ, "read", &replica_stub::on_client_read); + + register_rpc_handler(RPC_CONFIG_PROPOSAL, "ProposeConfig", &replica_stub::on_config_proposal); + + register_rpc_handler(RPC_PREPARE, "prepare", &replica_stub::on_prepare); + register_rpc_handler(RPC_LEARN, "Learn", &replica_stub::on_learn); + register_rpc_handler(RPC_LEARN_COMPLETITION_NOTIFY, "LearnNotify", &replica_stub::on_learn_completion_notification); + register_rpc_handler(RPC_LEARN_ADD_LEARNER, "LearnAdd", &replica_stub::on_add_learner); + register_rpc_handler(RPC_REMOVE_REPLICA, "remove", &replica_stub::on_remove); + register_rpc_handler(RPC_GROUP_CHECK, "GroupCheck", &replica_stub::on_group_check); + register_rpc_handler(RPC_QUERY_PN_DECREE, "query_decree", &replica_stub::on_query_decree); +} + +void replica_stub::close() +{ + if (_config_sync_timer_task != nullptr) + { + _config_sync_timer_task->cancel(true); + _config_sync_timer_task = nullptr; + } + + if (_config_query_task != nullptr) + { + _config_query_task->cancel(true); + _config_query_task = nullptr; + } + _state = NS_Disconnected; + + if (_gc_timer_task != nullptr) + { + _gc_timer_task->cancel(true); + _gc_timer_task = nullptr; + } + + { + zauto_lock l(_repicas_lock); + while (_closing_replicas.empty() == false) + { + auto task = _closing_replicas.begin()->second.first; + _repicas_lock.unlock(); + + task->wait(); + + _repicas_lock.lock(); + _closing_replicas.erase(_closing_replicas.begin()); + } + + while (_opening_replicas.empty() == false) + { + auto task = _opening_replicas.begin()->second; + _repicas_lock.unlock(); + + task->cancel(true); + + _repicas_lock.lock(); + _opening_replicas.erase(_opening_replicas.begin()); + } + + while (_replicas.empty() == false) + { + _replicas.begin()->second->close(); + _replicas.erase(_replicas.begin()); + } + } + + if (_failure_detector != nullptr) + { + _failure_detector->stop(); + delete _failure_detector; + _failure_detector = nullptr; + } + + if (_log != nullptr) + { + _log->close(); + delete _log; + _log = nullptr; + } +} + +}} // namespace + diff --git a/src/apps/replication/lib/replica_stub.h b/src/apps/replication/lib/replica_stub.h new file mode 100644 index 0000000000..76e5b549a1 --- /dev/null +++ b/src/apps/replication/lib/replica_stub.h @@ -0,0 +1,170 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +// +// the replica_stub is the *singleton* entry to +// access all replica managed in the same process +// replica_stub(singleton) --> replica --> replication_app +// + +#include "replication_common.h" + +namespace dsn { namespace replication { + +class mutation_log; +class replication_failure_detector; + +// from, new replica config, isClosing +typedef std::function replica_state_subscriber; + +class replica_stub : public serverlet, public ref_object +{ +public: + replica_stub(replica_state_subscriber subscriber = nullptr, bool is_long_subscriber = true); + ~replica_stub(void); + + // + // initialization + // + void initialize(const replication_options& opts, configuration_ptr config, bool clear = false); + void initialize(configuration_ptr config, bool clear = false); + void set_options(const replication_options& opts) { _options = opts; } + void open_service(); + void close(); + + // + // requests from clients + // + void on_client_write(message_ptr& request); + void on_client_read(message_ptr& request); + + // + // messages from meta server + // + void on_config_proposal(const configuration_update_request& proposal); + void on_query_decree(const query_replica_decree_request& req, __out_param query_replica_decree_response& resp); + + // + // messages from peers (primary or secondary) + // - prepare + // - commit + // - learn + // + void on_prepare(message_ptr& request); + void on_learn(const learn_request& request, __out_param learn_response& response); + void on_learn_completion_notification(const group_check_response& report); + void on_add_learner(const group_check_request& request); + void on_remove(const replica_configuration& request); + void on_group_check(const group_check_request& request, __out_param group_check_response& response); + + // + // local messages + // + void on_meta_server_connected(); + void on_meta_server_disconnected(); + void on_gc(); + + // + // routines published for test + // + void init_gc_for_test(); + void set_meta_server_disconnected_for_test() { on_meta_server_disconnected(); } + void set_meta_server_connected_for_test(const configuration_query_by_node_response& config); + + // + // common routines for inquiry + // + const std::string& dir() const { return _dir; } + replica_ptr get_replica(global_partition_id gpid, bool new_when_possible = false, const char* app_type = nullptr); + replica_ptr get_replica(int32_t app_id, int32_t partition_index); + replication_options& options() { return _options; } + configuration_ptr config() const { return _config; } + bool is_connected() const { return NS_Connected == _state; } + + // p_tableID = MAX_UInt32 for replica of all tables. + void get_primary_replica_list(uint32_t p_tableID, std::vector& p_repilcaList); + +private: + enum ReplicaNodeState + { + NS_Disconnected, + NS_Connecting, + NS_Connected + }; + + void query_configuration_by_node(); + void on_meta_server_disconnected_scatter(replica_stub_ptr this_, global_partition_id gpid); + void on_node_query_reply(int err, message_ptr& request, message_ptr& response); + void on_node_query_reply_scatter(replica_stub_ptr this_, const partition_configuration& config); + void on_node_query_reply_scatter2(replica_stub_ptr this_, global_partition_id gpid); + void remove_replica_on_meta_server(const partition_configuration& config); + task_ptr begin_open_replica(const std::string& app_type, global_partition_id gpid, std::shared_ptr req = nullptr); + void open_replica(const std::string app_type, global_partition_id gpid, std::shared_ptr req); + task_ptr begin_close_replica(replica_ptr r); + void close_replica(replica_ptr r); + void add_replica(replica_ptr r); + bool remove_replica(replica_ptr r); + void notify_replica_state_update(const replica_configuration& config, bool isClosing); + +private: + typedef std::map replicas; + typedef std::map opening_replicas; + typedef std::map> closing_replicas; // + + zlock _repicas_lock; + replicas _replicas; + opening_replicas _opening_replicas; + closing_replicas _closing_replicas; + + mutation_log *_log; + std::string _dir; + + replication_failure_detector *_failure_detector; + volatile ReplicaNodeState _state; + + // constants + replication_options _options; + configuration_ptr _config; + replica_state_subscriber _replica_state_subscriber; + bool _is_long_subscriber; + + // temproal states + task_ptr _config_query_task; + task_ptr _config_sync_timer_task; + task_ptr _gc_timer_task; + +private: + friend class replica; + void response_client_error(message_ptr& request, int error); + void replay_mutation(mutation_ptr& mu, replicas* rps); +}; + +DEFINE_REF_OBJECT(replica_stub) + +//------------ inline impl ---------------------- + +}} // namespace diff --git a/src/apps/replication/lib/replication_admission_controller.cpp b/src/apps/replication/lib/replication_admission_controller.cpp new file mode 100644 index 0000000000..c553a32a40 --- /dev/null +++ b/src/apps/replication/lib/replication_admission_controller.cpp @@ -0,0 +1,54 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replication_admission_controller.h" + +namespace dsn { namespace replication { + +replication_admission_controller::replication_admission_controller(task_queue* q, std::vector& sargs) + : admission_controller(q, sargs) +{ +} + +replication_admission_controller::~replication_admission_controller(void) +{ +} + +bool replication_admission_controller::is_task_accepted(task_ptr& task) +{ + if (task->code() != RPC_REPLICATION_CLIENT_WRITE && task->code() != RPC_REPLICATION_CLIENT_READ) + return true; + + // read latency + + return true; +} + +int replication_admission_controller::get_system_utilization() +{ + return 0; +} + +}} // end namespace diff --git a/src/apps/replication/lib/replication_admission_controller.h b/src/apps/replication/lib/replication_admission_controller.h new file mode 100644 index 0000000000..4a7a25d976 --- /dev/null +++ b/src/apps/replication/lib/replication_admission_controller.h @@ -0,0 +1,49 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include +using namespace dsn::service; +#include + +namespace dsn { namespace replication { + +class replication_admission_controller : + public admission_controller +{ +public: + replication_admission_controller(task_queue* q, std::vector& sargs); + ~replication_admission_controller(void); + +private: + virtual bool is_task_accepted(task_ptr& task); + virtual int get_system_utilization(); + +private: + +}; + +}} // end namespace diff --git a/src/apps/replication/lib/replication_app_base.cpp b/src/apps/replication/lib/replication_app_base.cpp new file mode 100644 index 0000000000..fd14ed503a --- /dev/null +++ b/src/apps/replication/lib/replication_app_base.cpp @@ -0,0 +1,92 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "replica.h" +#include "mutation.h" +#include + +#define __TITLE__ "TwoPhaseCommit" + +namespace dsn { namespace replication { + +void register_replica_provider(replica_app_factory f, const char* name) +{ + ::dsn::utils::factory_store::register_factory(name, f, PROVIDER_TYPE_MAIN); +} + +replication_app_base::replication_app_base(replica* replica, configuration_ptr& config) +{ + _dir = replica->dir(); + _replica = replica; + _last_committed_decree = _last_durable_decree = 0; +} + +int replication_app_base::write_internal(mutation_ptr& mu, bool ack_client) +{ + dassert (mu->data.header.decree == last_committed_decree() + 1, ""); + + int err = 0; + auto& msg = mu->client_request; + dispatch_rpc_call( + mu->rpc_code, + msg, + ack_client + ); + + ++_last_committed_decree; + return err; +} + +int replication_app_base::dispatch_rpc_call(int code, message_ptr& request, bool ack_client) +{ + auto it = _handlers.find(code); + if (it != _handlers.end()) + { + if (ack_client) + { + message_ptr response = request->create_response(); + int err = 0; + marshall(response->writer(), err); + it->second(request, response); + } + else + { + message_ptr response(nullptr); + it->second(request, response); + } + } + else if (ack_client) + { + message_ptr response = request->create_response(); + error_code err = ERR_HANDLER_NOT_FOUND; + marshall(response->writer(), (int)err); + rpc::reply(response); + } + + return 0; +} + +}} // end namespace diff --git a/src/apps/replication/lib/replication_failure_detector.cpp b/src/apps/replication/lib/replication_failure_detector.cpp new file mode 100644 index 0000000000..fe0e579a94 --- /dev/null +++ b/src/apps/replication/lib/replication_failure_detector.cpp @@ -0,0 +1,141 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "replication_failure_detector.h" +#include "replica_stub.h" + +namespace dsn { namespace replication { + + +replication_failure_detector::replication_failure_detector(replica_stub* stub, std::vector& meta_servers) +{ + _stub = stub; + _meta_servers = meta_servers; + _current_meta_server = _meta_servers[random32(0, 100) % _meta_servers.size()]; +} + +replication_failure_detector::~replication_failure_detector(void) +{ + +} + +end_point replication_failure_detector::find_next_meta_server(end_point current) +{ + if (end_point::INVALID == current) + return _meta_servers[random32(0, 100) % _meta_servers.size()]; + else + { + auto it = std::find(_meta_servers.begin(), _meta_servers.end(), current); + dassert (it != _meta_servers.end(), ""); + it++; + if (it != _meta_servers.end()) + return *it; + else + return _meta_servers.at(0); + } +} + +void replication_failure_detector::end_ping(::dsn::error_code err, const fd::beacon_ack& ack, void* context) +{ + failure_detector::end_ping(err, ack, context); + + zauto_lock l(_meta_lock); + + if (ack.this_node == _current_meta_server) + { + if (err) + { + end_point node = find_next_meta_server(ack.this_node); + if (ack.this_node != node) + { + switch_master(ack.this_node, node); + } + } + else if (ack.is_master == false) + { + if (end_point::INVALID != ack.primary_node) + { + switch_master(ack.this_node, ack.primary_node); + } + } + } + + else + { + if (err) + { + // nothing to do + } + else if (ack.is_master == false) + { + if (end_point::INVALID != ack.primary_node) + { + switch_master(ack.this_node, ack.primary_node); + } + } + else + { + _current_meta_server = ack.this_node; + } + } +} + +// client side +void replication_failure_detector::on_master_disconnected( const std::vector& nodes ) +{ + bool primaryDisconnected = false; + + { + zauto_lock l(_meta_lock); + for (auto it = nodes.begin(); it != nodes.end(); it++) + { + if (_current_meta_server == *it) + primaryDisconnected = true; + } + } + + if (primaryDisconnected) + { + _stub->on_meta_server_disconnected(); + } +} + +void replication_failure_detector::on_master_connected( const end_point& node) +{ + bool is_primary = false; + + { + zauto_lock l(_meta_lock); + is_primary = (node == _current_meta_server); + } + + if (is_primary) + { + _stub->on_meta_server_connected(); + } +} + +}} // end namespace + diff --git a/src/apps/replication/lib/replication_failure_detector.h b/src/apps/replication/lib/replication_failure_detector.h new file mode 100644 index 0000000000..c24949cc60 --- /dev/null +++ b/src/apps/replication/lib/replication_failure_detector.h @@ -0,0 +1,67 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include "replication_common.h" +# include + +namespace dsn { namespace replication { + +class replica_stub; +class replication_failure_detector : public dsn::fd::failure_detector +{ +public: + replication_failure_detector(replica_stub* stub, std::vector& meta_servers); + ~replication_failure_detector(void); + + virtual void end_ping(::dsn::error_code err, const fd::beacon_ack& ack, void* context); + + // client side + virtual void on_master_disconnected( const std::vector& nodes ); + virtual void on_master_connected( const end_point& node); + + // server side + virtual void on_worker_disconnected( const std::vector& nodes ) { dassert (false, ""); } + virtual void on_worker_connected( const end_point& node ) { dassert (false, ""); } + + end_point current_server_contact() const { zauto_lock l(_meta_lock); return _current_meta_server; } + std::vector get_servers() const { zauto_lock l(_meta_lock); return _meta_servers; } + +private: + end_point find_next_meta_server(end_point current); + +private: + typedef std::set end_points; + + mutable zlock _meta_lock; + end_point _current_meta_server; + + std::vector _meta_servers; + replica_stub *_stub; +}; + +}} // end namespace + diff --git a/src/apps/replication/lib/replication_service_app.cpp b/src/apps/replication/lib/replication_service_app.cpp new file mode 100644 index 0000000000..c920739f64 --- /dev/null +++ b/src/apps/replication/lib/replication_service_app.cpp @@ -0,0 +1,63 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# include "replication_common.h" +# include "replica_stub.h" + +namespace dsn { namespace replication { + +replication_service_app::replication_service_app(service_app_spec* s) + : service_app(s) +{ + _stub = nullptr; +} + +replication_service_app::~replication_service_app(void) +{ +} + +error_code replication_service_app::start(int argc, char** argv) +{ + replication_options opts; + opts.initialize(system::config()); + opts.working_dir = "./" + name(); + + _stub = new replica_stub(); + _stub->initialize(opts, system::config()); + _stub->open_service(); + return ERR_SUCCESS; +} + +void replication_service_app::stop(bool cleanup) +{ + if (_stub != nullptr) + { + _stub->close(); + _stub = nullptr; + } +} + +}} diff --git a/src/apps/replication/meta_server/CMakeLists.txt b/src/apps/replication/meta_server/CMakeLists.txt new file mode 100644 index 0000000000..73dadc8557 --- /dev/null +++ b/src/apps/replication/meta_server/CMakeLists.txt @@ -0,0 +1,2 @@ +include_directories(AFTER ../client_lib ../../../dist/failure_detector) +dsn_add_library(dsn.replication.meta_server) diff --git a/src/apps/replication/meta_server/load_balancer.cpp b/src/apps/replication/meta_server/load_balancer.cpp new file mode 100644 index 0000000000..2ebd2b0e52 --- /dev/null +++ b/src/apps/replication/meta_server/load_balancer.cpp @@ -0,0 +1,170 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "load_balancer.h" +#include + +bool MachineLoadComp(const std::pair& l, const std::pair& r) +{ + return l.second < r.second; +} + +load_balancer::load_balancer(server_state* state) +: _state(state), serverlet("load_balancer") +{ +} + +load_balancer::~load_balancer() +{ +} + +void load_balancer::run() +{ + zauto_read_lock l(_state->_lock); + + for (size_t i = 0; i < _state->_apps.size(); i++) + { + app_state& app = _state->_apps[i]; + + for (int j = 0; j < app.partition_count; j++) + { + partition_configuration& pc = app.partitions[j]; + run_lb(pc); + } + } +} + +void load_balancer::run(global_partition_id gpid) +{ + zauto_read_lock l(_state->_lock); + partition_configuration& pc = _state->_apps[gpid.app_id - 1].partitions[gpid.pidx]; + run_lb(pc); +} + +end_point load_balancer::find_minimal_load_machine(bool primaryOnly) +{ + std::vector> stats; + + for (auto it = _state->_nodes.begin(); it != _state->_nodes.end(); it++) + { + if (it->second.is_alive) + { + stats.push_back(std::make_pair(it->first, static_cast(primaryOnly ? it->second.primaries.size() + : it->second.partitions.size()))); + } + } + + + std::sort(stats.begin(), stats.end(), [](const std::pair& l, const std::pair& r) + { + return l.second < r.second; + }); + + //std::sort(stats.begin(), stats.end(), MachineLoadComp); + + if (stats.empty()) + { + return end_point::INVALID; + } + + int candidateCount = 1; + int val = stats[0].second; + + for (size_t i = 1; i < stats.size(); i++) + { + if (stats[i].second > val) + break; + candidateCount++; + } + + return stats[env::random32(0, candidateCount - 1)].first; +} + +void load_balancer::run_lb(partition_configuration& pc) +{ + configuration_update_request proposal; + proposal.config = pc; + + if (pc.primary == end_point::INVALID) + { + proposal.type = CT_ASSIGN_PRIMARY; + if (pc.secondaries.size() > 0) + { + proposal.node = pc.secondaries[env::random32(0, static_cast(pc.secondaries.size()) - 1)]; + } + else + { + proposal.node = find_minimal_load_machine(true); + } + + if (proposal.node != end_point::INVALID) + { + send_proposal(proposal.node, proposal); + } + } + + else if (static_cast(pc.secondaries.size()) + 1 < pc.max_replica_count) + { + proposal.type = CT_ADD_SECONDARY; + proposal.node = find_minimal_load_machine(false); + if (proposal.node != end_point::INVALID) + { + send_proposal(pc.primary, proposal); + } + } + else + { + // it is healthy, nothing to do + } +} + +// meta server => partition server +void load_balancer::send_proposal(const end_point& node, const configuration_update_request& proposal) +{ + rpc::call_one_way_typed(node, RPC_CONFIG_PROPOSAL, proposal, gpid_to_hash(proposal.config.gpid)); +} + +void load_balancer::query_decree(std::shared_ptr query) +{ + rpc::call_typed(query->node, RPC_QUERY_PN_DECREE, query, this, &load_balancer::on_query_decree_ack, gpid_to_hash(query->gpid), 3000); +} + +void load_balancer::on_query_decree_ack(error_code err, std::shared_ptr& query, std::shared_ptr& resp) +{ + if (err) + { + tasking::enqueue(LPC_QUERY_PN_DECREE, this, std::bind(&load_balancer::query_decree, this, query), 0, 1000); + } + else + { + zauto_write_lock l(_state->_lock); + app_state& app = _state->_apps[query->gpid.app_id - 1]; + partition_configuration& ps = app.partitions[query->gpid.pidx]; + if (resp->last_decree > ps.last_committed_decree) + { + ps.last_committed_decree = resp->last_decree; + } + } +} diff --git a/src/apps/replication/meta_server/load_balancer.h b/src/apps/replication/meta_server/load_balancer.h new file mode 100644 index 0000000000..408d042b20 --- /dev/null +++ b/src/apps/replication/meta_server/load_balancer.h @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include "server_state.h" + +using namespace dsn; +using namespace dsn::service; +using namespace dsn::replication; + +class load_balancer : public serverlet +{ +public: + load_balancer(server_state* state); + ~load_balancer(); + + void run(); + void run(global_partition_id gpid); + +private: + // meta server => partition server + void send_proposal(const end_point& node, const configuration_update_request& proposal); + void query_decree(std::shared_ptr query); + void on_query_decree_ack(error_code err, std::shared_ptr& query, std::shared_ptr& resp); + + void run_lb(partition_configuration& pc); + end_point find_minimal_load_machine(bool primaryOnly); + +private: + server_state *_state; +}; + diff --git a/src/apps/replication/meta_server/meta_server_failure_detector.cpp b/src/apps/replication/meta_server/meta_server_failure_detector.cpp new file mode 100644 index 0000000000..e2b68734b0 --- /dev/null +++ b/src/apps/replication/meta_server/meta_server_failure_detector.cpp @@ -0,0 +1,133 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "meta_server_failure_detector.h" +#include "server_state.h" + +#define __TITLE__ "MetaServer.FD" + +meta_server_failure_detector::meta_server_failure_detector(server_state* state) +{ + _state = state; + _is_primary = false; +} + +meta_server_failure_detector::~meta_server_failure_detector(void) +{ +} + +void meta_server_failure_detector::on_worker_disconnected(const std::vector& nodes) +{ + if (!is_primary()) + { + return; + } + + node_states states; + for (auto& n : nodes) + { + states.push_back(std::make_pair(n, false)); + + dwarn("client expired: %s:%hu", n.name.c_str(), n.port); + } + + _state->set_node_state(states); +} + +void meta_server_failure_detector::on_worker_connected(const end_point& node) +{ + if (!is_primary()) + { + return; + } + + node_states states; + states.push_back(std::make_pair(node, true)); + + dwarn("Client reconnected", + "Client %s:%hu", node.name.c_str(), node.port); + + _state->set_node_state(states); +} + +bool meta_server_failure_detector::set_primary(bool is_primary /*= false*/) +{ + bool bRet = true; + if (is_primary && !_is_primary) + { + node_states ns; + _state->get_node_state(ns); + + for (auto& pr : ns) + { + register_worker(pr.first, pr.second); + } + + _is_primary = true; + } + + if (!is_primary && _is_primary) + { + clear_workers(); + _is_primary = false; + } + + return bRet; +} + +bool meta_server_failure_detector::is_primary() const +{ + return _is_primary; +} + +void meta_server_failure_detector::on_ping(const fd::beacon_msg& beacon, ::dsn::service::rpc_replier& reply) +{ + fd::beacon_ack ack; + ack.this_node = beacon.to; + if (!is_primary()) + { + end_point master; + if (_state->get_meta_server_primary(master)) + { + ack.time = beacon.time; + ack.is_master = false; + ack.primary_node = master; + } + else + { + ack.time = beacon.time; + ack.is_master = false; + ack.primary_node = end_point::INVALID; + } + } + else + { + failure_detector::on_ping_internal(beacon, ack); + ack.primary_node = primary_address(); + } + + reply(ack); +} + diff --git a/src/apps/replication/meta_server/meta_server_failure_detector.h b/src/apps/replication/meta_server/meta_server_failure_detector.h new file mode 100644 index 0000000000..8738604a3f --- /dev/null +++ b/src/apps/replication/meta_server/meta_server_failure_detector.h @@ -0,0 +1,67 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +#include "replication_common.h" + +using namespace dsn; +using namespace dsn::service; +using namespace dsn::replication; +using namespace dsn::fd; + +class server_state; +class meta_server_failure_detector : public failure_detector +{ +public: + meta_server_failure_detector(server_state* state); + ~meta_server_failure_detector(void); + + virtual bool set_primary(bool is_primary = false); + bool is_primary() const; + + // client side + virtual void on_master_disconnected(const std::vector& nodes) + { + dassert (false, "unsupported method"); + } + + virtual void on_master_connected(const end_point& node) + { + dassert (false, "unsupported method"); + } + + // server side + virtual void on_worker_disconnected(const std::vector& nodes); + virtual void on_worker_connected(const end_point& node); + + virtual void on_ping(const fd::beacon_msg& beacon, ::dsn::service::rpc_replier& reply); + +private: + bool _is_primary; + server_state *_state; +}; + diff --git a/src/apps/replication/meta_server/meta_service.cpp b/src/apps/replication/meta_server/meta_service.cpp new file mode 100644 index 0000000000..152a7ebe03 --- /dev/null +++ b/src/apps/replication/meta_server/meta_service.cpp @@ -0,0 +1,299 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "meta_service.h" +#include "server_state.h" +#include "load_balancer.h" +#include "meta_server_failure_detector.h" +#include +#include + +meta_service::meta_service(server_state* state) +: _state(state), serverlet("meta_service") +{ + _balancer = nullptr; + _failure_detector = nullptr; + _log = static_cast(0); + _offset = 0; + _data_dir = "."; + _started = false; + + _opts.initialize(system::config()); +} + +meta_service::~meta_service(void) +{ +} + +void meta_service::start(const char* data_dir, bool clean_state) +{ + _data_dir = data_dir; + + if (clean_state) + { + try { + boost::filesystem::remove(_data_dir + "/checkpoint"); + boost::filesystem::remove(_data_dir + "/oplog"); + } + catch (std::exception& ex) + { + ex; + } + } + else + { + if (!boost::filesystem::exists(_data_dir)) + { + boost::filesystem::create_directory(_data_dir); + } + + if (boost::filesystem::exists(_data_dir + "/checkpoint")) + { + _state->load((_data_dir + "/checkpoint").c_str()); + } + + if (boost::filesystem::exists(_data_dir + "/oplog")) + { + replay_log((_data_dir + "/oplog").c_str()); + _state->save((_data_dir + "/checkpoint").c_str()); + boost::filesystem::remove(_data_dir + "/oplog"); + } + } + + _log = file::open((_data_dir + "/oplog").c_str(), O_RDWR | O_CREAT, 0666); + + _balancer = new load_balancer(_state); + _failure_detector = new meta_server_failure_detector(_state); + _balancer_timer = tasking::enqueue(LPC_LBM_RUN, this, &meta_service::on_load_balance_timer, 0, 1000, 5000); + + register_rpc_handler(RPC_CM_CALL, "RPC_CM_CALL", &meta_service::on_request); + + end_point primary; + if (_state->get_meta_server_primary(primary) && primary == primary_address()) + { + _failure_detector->set_primary(true); + } + else + _failure_detector->set_primary(false); + + + _failure_detector->start( + _opts.fd_check_interval_seconds, + _opts.fd_beacon_interval_seconds, + _opts.fd_lease_seconds, + _opts.fd_grace_seconds, + false + ); + + _started = true; +} + +bool meta_service::stop() +{ + if (!_started) return false; + _started = false; + _failure_detector->stop(); + delete _failure_detector; + _failure_detector = nullptr; + + _balancer_timer->cancel(true); + unregister_rpc_handler(RPC_CM_CALL); + delete _balancer; + _balancer = nullptr; + return true; +} + +void meta_service::on_request(message_ptr& msg) +{ + meta_request_header hdr; + unmarshall(msg, hdr); + + meta_response_header rhdr; + bool is_primary = _state->get_meta_server_primary(rhdr.primary_address); + if (is_primary) is_primary = (primary_address() == rhdr.primary_address); + rhdr.err = ERR_SUCCESS; + + message_ptr resp = msg->create_response(); + if (!is_primary) + { + rhdr.err = ERR_TALK_TO_OTHERS; + + marshall(resp, rhdr); + } + else if (hdr.rpc_tag == RPC_CM_QUERY_NODE_PARTITIONS) + { + configuration_query_by_node_request request; + configuration_query_by_node_response response; + unmarshall(msg, request); + + query_configuration_by_node(request, response); + + marshall(resp, rhdr); + marshall(resp, response); + } + + else if (hdr.rpc_tag == RPC_CM_QUERY_PARTITION_CONFIG_BY_INDEX) + { + configuration_query_by_index_request request; + configuration_query_by_index_response response; + unmarshall(msg, request); + + query_configuration_by_index(request, response); + + marshall(resp, rhdr); + marshall(resp, response); + } + + else if (hdr.rpc_tag == RPC_CM_UPDATE_PARTITION_CONFIGURATION) + { + update_configuration(msg, resp); + return; + } + + else + { + dassert(false, "unknown rpc tag %x (%s)", hdr.rpc_tag, task_code(hdr.rpc_tag).to_string()); + } + + rpc::reply(resp); +} + +// partition server & client => meta server +void meta_service::query_configuration_by_node(configuration_query_by_node_request& request, __out_param configuration_query_by_node_response& response) +{ + _state->query_configuration_by_node(request, response); +} + +void meta_service::query_configuration_by_index(configuration_query_by_index_request& request, __out_param configuration_query_by_index_response& response) +{ + _state->query_configuration_by_index(request, response); +} + +void meta_service::replay_log(const char* log) +{ + FILE* fp = ::fopen(log, "rb"); + dassert (fp != nullptr, "open operation log %s failed, err = %d", log, errno); + + char buffer[4096]; // enough for holding configuration_update_request + while (true) + { + int32_t len; + if (1 != ::fread((void*)&len, sizeof(int32_t), 1, fp)) + break; + + dassert(len <= 4096, ""); + auto r = ::fread((void*)buffer, len, 1, fp); + dassert(r == 1, "log is corrupted"); + + blob bb(buffer, 0, len); + binary_reader reader(bb); + + configuration_update_request request; + configuration_update_response response; + unmarshall(reader, request); + update_configuration(request, response); + } + + ::fclose(fp); +} + +void meta_service::update_configuration(message_ptr req, message_ptr resp) +{ + auto bb = req->reader().get_remaining_buffer(); + uint64_t offset; + int len = bb.length() + sizeof(int32_t); + + char* buffer = (char*)malloc(len); + *(int32_t*)buffer = bb.length(); + memcpy(buffer + sizeof(int32_t), bb.data(), bb.length()); + + { + + zauto_lock l(_log_lock); + offset = _offset; + _offset += len; + + file::write(_log, buffer, len, offset, LPC_CM_LOG_UPDATE, this, + std::bind(&meta_service::on_log_completed, this, std::placeholders::_1, std::placeholders::_2, buffer, req, resp)); + } +} + +void meta_service::on_log_completed(error_code err, int size, char* buffer, message_ptr req, message_ptr resp) +{ + free(buffer); + dassert(err == ERR_SUCCESS, "log operation failed, cannot proceed, err = %s", err.to_string()); + + configuration_update_request request; + configuration_update_response response; + unmarshall(req, request); + + update_configuration(request, response); + + meta_response_header rhdr; + rhdr.err = err; + rhdr.primary_address = primary_address(); + + marshall(resp, rhdr); + marshall(resp, response); + + rpc::reply(resp); +} + +void meta_service::update_configuration(configuration_update_request& request, __out_param configuration_update_response& response) +{ + _state->update_configuration(request, response); + + if (_started) + tasking::enqueue(LPC_LBM_RUN, this, std::bind(&meta_service::on_config_changed, this, request.config.gpid)); +} + +// local timers +void meta_service::on_load_balance_timer() +{ + end_point primary; + if (_state->get_meta_server_primary(primary) && primary == primary_address()) + { + _failure_detector->set_primary(true); + _balancer->run(); + } + else + { + _failure_detector->set_primary(false); + } +} + +void meta_service::on_config_changed(global_partition_id gpid) +{ + end_point primary; + if (_state->get_meta_server_primary(primary) && primary == primary_address()) + { + _failure_detector->set_primary(true); + _balancer->run(gpid); + } + else + { + _failure_detector->set_primary(false); + } +} diff --git a/src/apps/replication/meta_server/meta_service.h b/src/apps/replication/meta_server/meta_service.h new file mode 100644 index 0000000000..26b7660e7b --- /dev/null +++ b/src/apps/replication/meta_server/meta_service.h @@ -0,0 +1,77 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include "replication_common.h" + +using namespace dsn; +using namespace dsn::service; +using namespace dsn::replication; + +class server_state; +class load_balancer; +class meta_server_failure_detector; +class meta_service : public serverlet +{ +public: + meta_service(server_state* state); + ~meta_service(void); + + void start(const char* data_dir, bool clean_state); + bool stop(); + +private: + void on_request(message_ptr& request); + void replay_log(const char* log); + + // partition server & client => meta server + void query_configuration_by_node(configuration_query_by_node_request& request, __out_param configuration_query_by_node_response& response); + void query_configuration_by_index(configuration_query_by_index_request& request, __out_param configuration_query_by_index_response& response); + + // update configuration + void update_configuration(message_ptr req, message_ptr resp); + void on_log_completed(error_code err, int size, char* buffer, message_ptr req, message_ptr resp); + void update_configuration(configuration_update_request& request, __out_param configuration_update_response& response); + + // load balance actions + void on_load_balance_timer(); + void on_config_changed(global_partition_id gpid); + +private: + friend class meta_server_failure_detector; + meta_server_failure_detector *_failure_detector; + server_state *_state; + load_balancer *_balancer; + task_ptr _balancer_timer; + replication_options _opts; + std::string _data_dir; + bool _started; + + zlock _log_lock; + handle_t _log; + uint64_t _offset; +}; + diff --git a/src/apps/replication/meta_server/meta_service_app.cpp b/src/apps/replication/meta_server/meta_service_app.cpp new file mode 100644 index 0000000000..843be92902 --- /dev/null +++ b/src/apps/replication/meta_server/meta_service_app.cpp @@ -0,0 +1,87 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include "server_state.h" +#include "meta_service.h" + +namespace dsn { + namespace service { + + server_state * meta_service_app::_reliable_state = nullptr; + + meta_service_app::meta_service_app(service_app_spec* s) + : service_app(s) + { + _service = nullptr; + } + + meta_service_app::~meta_service_app() + { + + } + + error_code meta_service_app::start(int argc, char** argv) + { + if (nullptr == _reliable_state) + { + _reliable_state = new server_state(); + } + + _service = new meta_service(_reliable_state); + + auto cf = system::config(); + _reliable_state->init_app(cf); + _reliable_state->add_meta_node(_service->primary_address()); + _service->start(name().c_str(), false); + return ERR_SUCCESS; + } + + void meta_service_app::stop(bool cleanup) + { + if (_reliable_state != nullptr) + { + if (_service != nullptr) + { + _service->stop(); + _reliable_state->remove_meta_node(_service->primary_address()); + delete _service; + _service = nullptr; + + end_point primary; + if (!_reliable_state->get_meta_server_primary(primary)) + { + delete _reliable_state; + _reliable_state = nullptr; + } + } + } + else + { + dassert(_service == nullptr, "service must be null"); + } + } + } +} diff --git a/src/apps/replication/meta_server/server_state.cpp b/src/apps/replication/meta_server/server_state.cpp new file mode 100644 index 0000000000..90d54a0573 --- /dev/null +++ b/src/apps/replication/meta_server/server_state.cpp @@ -0,0 +1,360 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "server_state.h" +# include +# include + +# define __TITLE__ "meta.server.state" + +void marshall(binary_writer& writer, const app_state& val, uint16_t pos = 0xffff) +{ + marshall(writer, val.app_type); + marshall(writer, val.app_name); + marshall(writer, val.app_id); + marshall(writer, val.partition_count); + marshall(writer, val.partitions); +} + +void unmarshall(binary_reader& reader, __out_param app_state& val) +{ + unmarshall(reader, val.app_type); + unmarshall(reader, val.app_name); + unmarshall(reader, val.app_id); + unmarshall(reader, val.partition_count); + unmarshall(reader, val.partitions); +} + + +server_state::server_state(void) +{ + _leader_index = -1; +} + +server_state::~server_state(void) +{ +} + + +void server_state::load(const char* chk_point) +{ + FILE* fp = ::fopen(chk_point, "rb"); + + int32_t len; + ::fread((void*)&len, sizeof(int32_t), 1, fp); + + std::shared_ptr buffer((char*)malloc(len)); + ::fread((void*)buffer.get(), len, 1, fp); + + blob bb(buffer, 0, len); + binary_reader reader(bb); + unmarshall(reader, _apps); + + ::fclose(fp); + + dassert(_apps.size() == 1, ""); + auto& app = _apps[0]; + for (int i = 0; i < app.partition_count; i++) + { + auto& ps = app.partitions[i]; + + if (ps.primary != end_point::INVALID) + { + _nodes[ps.primary].primaries.insert(ps.gpid); + _nodes[ps.primary].partitions.insert(ps.gpid); + } + + for (auto& ep : ps.secondaries) + { + if (ep != end_point::INVALID) + { + _nodes[ps.primary].partitions.insert(ps.gpid); + } + } + } + + for (auto& node : _nodes) + { + node.second.address = node.first; + node.second.is_alive = true; + } +} + +void server_state::save(const char* chk_point) +{ + binary_writer writer; + marshall(writer, _apps); + + FILE* fp = ::fopen(chk_point, "wb+"); + + int32_t len = writer.total_size(); + ::fwrite((const void*)&len, sizeof(len), 1, fp); + + std::vector bbs; + writer.get_buffers(bbs); + + for (auto& bb : bbs) + { + ::fwrite((const void*)bb.data(), bb.length(), 1, fp); + } + + ::fclose(fp); +} + +void server_state::init_app(configuration_ptr& cf) +{ + zauto_write_lock l(_lock); + if (_apps.size() > 0) + return; + + app_state app; + app.app_id = 1; + app.app_name = cf->get_string_value("replication.app", "app_name", ""); + dassert(app.app_name.length() > 0, "'[replication.app] app_name' not specified"); + app.app_type = cf->get_string_value("replication.app", "app_type", ""); + dassert(app.app_type.length() > 0, "'[replication.app] app_type' not specified"); + app.partition_count = cf->get_value("replication.app", "partition_count", 1); + + int32_t max_replica_count = cf->get_value("replication.app", "max_replica_count", 3); + for (int i = 0; i < app.partition_count; i++) + { + partition_configuration ps; + ps.app_type = app.app_type; + ps.ballot = 0; + ps.gpid.app_id = app.app_id; + ps.gpid.pidx = i; + ps.last_committed_decree = 0; + ps.max_replica_count = max_replica_count; + + app.partitions.push_back(ps); + } + + _apps.push_back(app); +} + +void server_state::get_node_state(__out_param node_states& nodes) +{ + zauto_read_lock l(_lock); + for (auto it = _nodes.begin(); it != _nodes.end(); it++) + { + nodes.push_back(std::make_pair(it->first, it->second.is_alive)); + } +} + +void server_state::set_node_state(const node_states& nodes) +{ + zauto_write_lock l(_lock); + for (auto& itr : nodes) + { + auto it = _nodes.find(itr.first); + if (it != _nodes.end()) + it->second.is_alive = itr.second; + else + { + node_state n; + n.address = itr.first; + n.is_alive = itr.second; + + _nodes[itr.first] = n; + } + } +} + +bool server_state::get_meta_server_primary(__out_param end_point& node) +{ + zauto_read_lock l(_meta_lock); + if (-1 == _leader_index) + return false; + else + { + node = _meta_servers[_leader_index]; + return true; + } +} + +void server_state::add_meta_node(const end_point& node) +{ + zauto_write_lock l(_meta_lock); + + _meta_servers.push_back(node); + if (1 == _meta_servers.size()) + _leader_index = 0; +} + +void server_state::remove_meta_node(const end_point& node) +{ + zauto_write_lock l(_meta_lock); + + int i = -1; + for (auto it = _meta_servers.begin(); it != _meta_servers.end(); it++) + { + i++; + if (*it == node) + { + _meta_servers.erase(it); + if (_meta_servers.size() == 0) + _leader_index = -1; + + else if (i == _leader_index) + { + _leader_index = env::random32(0, (uint32_t)_meta_servers.size() - 1); + } + return; + } + } + + dassert (false, "cannot find node '%s:%d' in server state", node.name.c_str(), static_cast(node.port)); +} + +void server_state::switch_meta_primary() +{ + zauto_write_lock l(_meta_lock); + if (1 == _meta_servers.size()) + return; + + while (true) + { + int r = env::random32(0, (uint32_t)_meta_servers.size() - 1); + if (r != _leader_index) + { + _leader_index = r; + return; + } + } +} + +// partition server & client => meta server +void server_state::query_configuration_by_node(configuration_query_by_node_request& request, __out_param configuration_query_by_node_response& response) +{ + zauto_read_lock l(_lock); + auto it = _nodes.find(request.node); + if (it == _nodes.end()) + { + response.err = ERR_OBJECT_NOT_FOUND; + } + else + { + response.err = ERR_SUCCESS; + + for (auto& p : it->second.partitions) + { + response.partitions.push_back(_apps[p.app_id - 1].partitions[p.pidx]); + } + } +} + +void server_state::query_configuration_by_index(configuration_query_by_index_request& request, __out_param configuration_query_by_index_response& response) +{ + zauto_read_lock l(_lock); + + for (size_t i = 0; i < _apps.size(); i++) + { + app_state& kv = _apps[i]; + if (kv.app_name == request.app_name) + { + response.err = ERR_SUCCESS; + app_state& app = kv; + for (auto& idx : request.partition_indices) + { + if (idx < app.partition_count) + { + response.partitions.push_back(app.partitions[idx]); + } + } + return; + } + } + + response.err = ERR_OBJECT_NOT_FOUND; +} + +void server_state::update_configuration(configuration_update_request& request, __out_param configuration_update_response& response) +{ + zauto_write_lock l(_lock); + + app_state& app = _apps[request.config.gpid.app_id - 1]; + partition_configuration& old = app.partitions[request.config.gpid.pidx]; + if (old.ballot + 1 == request.config.ballot) + { + response.err = ERR_SUCCESS; + + // update to new config + old = request.config; + response.config = request.config; + + node_state& node = _nodes[request.node]; + + const char* type = "unknown"; + switch (request.type) + { + case CT_ASSIGN_PRIMARY: + node.partitions.insert(old.gpid); + node.primaries.insert(old.gpid); + type = "assign primary"; + break; + case CT_ADD_SECONDARY: + node.partitions.insert(old.gpid); + type = "add secondary"; + break; + case CT_DOWNGRADE_TO_SECONDARY: + node.primaries.erase(old.gpid); + type = "downgrade to secondary"; + break; + case CT_DOWNGRADE_TO_INACTIVE: + case CT_REMOVE: + node.partitions.erase(old.gpid); + node.primaries.erase(old.gpid); + type = request.type == CT_REMOVE ? "remove" : "downgrade to inactive"; + break; + case CT_UPGRADE_TO_SECONDARY: + node.partitions.insert(old.gpid); + type = "upgrade to secondary"; + break; + default: + dassert(false, "invalid config type %x", static_cast(request.type)); + } + + std::stringstream cf; + cf << "{primary:" << request.config.primary.name << ":" << request.config.primary.port << ", secondaries = ["; + for (auto& s : request.config.secondaries) + { + cf << s.name << ":" << s.port << ","; + } + cf << "]}"; + + ddebug("%d.%d metaupdateok to ballot %lld, type = %s, config = %s", + request.config.gpid.app_id, + request.config.gpid.pidx, + request.config.ballot, + type, + cf.str().c_str() + ); + } + else + { + response.err = ERR_INVALID_VERSION; + response.config = old; + } +} diff --git a/src/apps/replication/meta_server/server_state.h b/src/apps/replication/meta_server/server_state.h new file mode 100644 index 0000000000..1684fd90ed --- /dev/null +++ b/src/apps/replication/meta_server/server_state.h @@ -0,0 +1,89 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include "replication_common.h" +#include + +using namespace dsn; +using namespace dsn::service; +using namespace dsn::replication; + +typedef std::list> node_states; + +struct app_state +{ + std::string app_type; + std::string app_name; + int32_t app_id; + int32_t partition_count; + std::vector partitions; +}; + +class server_state +{ +public: + server_state(void); + ~server_state(void); + + void init_app(configuration_ptr& cf); + + void get_node_state(__out_param node_states& nodes); + void set_node_state(const node_states& nodes); + bool get_meta_server_primary(__out_param end_point& node); + + void add_meta_node(const end_point& node); + void remove_meta_node(const end_point& node); + void switch_meta_primary(); + + void load(const char* chk_point); + void save(const char* chk_point); + + // partition server & client => meta server + void query_configuration_by_node(configuration_query_by_node_request& request, __out_param configuration_query_by_node_response& response); + void query_configuration_by_index(configuration_query_by_index_request& request, __out_param configuration_query_by_index_response& response); + void update_configuration(configuration_update_request& request, __out_param configuration_update_response& response); + +private: + struct node_state + { + bool is_alive; + end_point address; + std::set primaries; + std::set partitions; + }; + + zrwlock _lock; + std::map _nodes; + std::vector _apps; + + zrwlock _meta_lock; + std::vector _meta_servers; + int _leader_index; + + friend class load_balancer; +}; + diff --git a/src/apps/replication/replication.thrift b/src/apps/replication/replication.thrift new file mode 100644 index 0000000000..536492d2a4 --- /dev/null +++ b/src/apps/replication/replication.thrift @@ -0,0 +1,260 @@ + +include "../../dsn.thrift" + +namespace cpp dsn.replication + +struct global_partition_id +{ + 1:i32 app_id = -1; + 2:i32 pidx = -1; +} + +struct mutation_header +{ + 1:global_partition_id gpid; + 2:i64 ballot; + 3:i64 decree; + 4:i64 log_offset; + 5:i64 last_committed_decree; +} + +struct mutation_data +{ + 1:mutation_header header; + 2:list updates; +} + +enum partition_status +{ + PS_INACTIVE, + PS_ERROR, + PS_PRIMARY, + PS_SECONDARY, + PS_POTENTIAL_SECONDARY, + PS_INVALID, +} + +struct partition_configuration +{ + 1:string app_type; + 2:global_partition_id gpid; + 3:i64 ballot; + 4:i32 max_replica_count; + 5:dsn.end_point primary; + 6:list secondaries; + 7:list drop_outs; + 8:i64 last_committed_decree; +} + +struct replica_configuration +{ + 1:global_partition_id gpid; + 2:i64 ballot; + 3:dsn.end_point primary; + 4:partition_status status = partition_status.PS_INACTIVE; +} + +struct prepare_msg +{ + 1:replica_configuration config; + 2:mutation_data mu; +} + +enum read_semantic_t +{ + ReadLastUpdate, + ReadOutdated, + ReadSnapshot +} + +struct read_request_header +{ + 1:global_partition_id gpid; + 2:i32 code; + 3:read_semantic_t semantic = read_semantic_t.ReadLastUpdate; + 4:i64 version_decree = -1; +} + +struct write_request_header +{ + 1:global_partition_id gpid; + 2:i32 code; +} + +struct rw_response_header +{ + 1:i32 err = 0; +} + +struct prepare_ack +{ + 1:global_partition_id gpid; + 2:i32 err; + 3:i64 ballot; + 4:i64 decree; + 5:i64 last_committed_decree_in_app; + 6:i64 last_committed_decree_in_prepare_list; +} + +struct learn_state +{ + 1:dsn.blob meta; + 2:list files; +} + +enum learner_status +{ + LearningWithoutPrepare, + LearningWithPrepare, + LearningSucceeded, + LearningFailed, + Learning_INVALID +} + +struct learn_request +{ + 1:global_partition_id gpid; + 2:dsn.end_point learner; + 3:i64 signature; + 4:i64 last_committed_decree_in_app; + 5:i64 last_committed_decree_in_prepare_list; + 6:dsn.blob app_specific_learn_request; +} + +struct learn_response +{ + 1:i32 err; + 2:replica_configuration config; + 3:i64 commit_decree; + 4:i64 prepare_start_decree; + 5:learn_state state; + 6:string base_local_dir; +} + +struct group_check_request +{ + 1:string app_type; + 2:dsn.end_point node; + 3:replica_configuration config; + 4:i64 last_committed_decree; + 5:i64 learner_signature; +} + +struct group_check_response +{ + 1:global_partition_id gpid; + 2:i32 err; + 3:i64 last_committed_decree_in_app; + 4:i64 last_committed_decree_in_prepare_list; + 5:learner_status learner_status_ = learner_status.LearningFailed; + 6:i64 learner_signature; + 7:dsn.end_point node; +} + +/////////////////// meta server messages //////////////////// +enum config_type +{ + CT_NONE, + CT_ASSIGN_PRIMARY, + CT_ADD_SECONDARY, + CT_DOWNGRADE_TO_SECONDARY, + CT_DOWNGRADE_TO_INACTIVE, + CT_REMOVE, + + // not used by meta server + CT_UPGRADE_TO_SECONDARY, +} + +struct meta_request_header +{ + 1:i32 rpc_tag; +} + +struct meta_response_header +{ + 1:i32 err; + 2:dsn.end_point primary_address; +} + +// primary | secondary(upgrading) (w/ new config) => meta server +struct configuration_update_request +{ + 1:partition_configuration config; + 2:config_type type = config_type.CT_NONE; + 3:dsn.end_point node; +} + +// meta server (config mgr) => primary | secondary (downgrade) (w/ new config) +struct configuration_update_response +{ + 1:i32 err; + 2:partition_configuration config; +} + +// proposal: meta server(LBM) => primary (w/ current config) +struct configuration_proposal_request +{ + 1:partition_configuration config; + 2:config_type type = config_type.CT_NONE; + 3:dsn.end_point node; + 4:bool is_clean_data = false; + 5:bool is_upgrade = false; +} + +// client => meta server +struct configuration_query_by_node_request +{ + 1:dsn.end_point node; +} + +// meta server => client +struct configuration_query_by_node_response +{ + 1:i32 err; + 2:list partitions; +} + +struct configuration_query_by_index_request +{ + 1:string app_name; + 2:list partition_indices; +} + +struct configuration_query_by_index_response +{ + 1:i32 err; + 2:list partitions; +} + +struct query_replica_decree_request +{ + 1:global_partition_id gpid; + 2:dsn.end_point node; +} + +struct query_replica_decree_response +{ + 1:i32 err; + 2:i64 last_decree; +} + +service replica_s +{ + rw_response_header client_write(1:write_request_header req); + rw_response_header client_read(1:read_request_header req); + prepare_ack prepare(1:prepare_msg request); + void config_proposal(1:configuration_update_request proposal); + learn_response learn(1:learn_request request); + void learn_completion_notification(1:group_check_response report); + void add_learner(1:group_check_request request); + void remove(1:replica_configuration request); + group_check_response group_check(1:group_check_request request); + query_replica_decree_response query_decree(1:query_replica_decree_request req); +} + +service meta_s +{ + configuration_query_by_node_response query_configuration_by_node(1:configuration_query_by_node_request query); + configuration_query_by_index_response query_configuration_by_index(1:configuration_query_by_index_request query); + configuration_update_response update_configuration(1:configuration_update_request update); +} \ No newline at end of file diff --git a/src/cli/CMakeLists.txt b/src/cli/CMakeLists.txt new file mode 100644 index 0000000000..92e0ce6802 --- /dev/null +++ b/src/cli/CMakeLists.txt @@ -0,0 +1,3 @@ +#set(INPUT_LIBS dsn.failure_detector ${DSN_LIBS}) +set(BINPLACE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/config.ini") +dsn_add_executable(dsn.cli "${BINPLACE_FILES}") diff --git a/src/cli/cli.client.h b/src/cli/cli.client.h new file mode 100644 index 0000000000..a73e984958 --- /dev/null +++ b/src/cli/cli.client.h @@ -0,0 +1,111 @@ +# pragma once +# include +# include "cli.types.h" +# include + + +namespace dsn { + + DEFINE_TASK_CODE_RPC(RPC_DSN_CLI_CALL, TASK_PRIORITY_HIGH, THREAD_POOL_DEFAULT); + +class cli_client + : public virtual ::dsn::service::servicelet +{ +public: + cli_client(const ::dsn::end_point& server) { _server = server; } + cli_client() { _server = ::dsn::end_point::INVALID; } + virtual ~cli_client() {} + + + // ---------- call RPC_DSN_CLI_CALL ------------ + // - synchronous + ::dsn::error_code call( + const command& c, + __out_param std::string& resp, + int timeout_milliseconds = 0, + int hash = 0, + const ::dsn::end_point *p_server_addr = nullptr) + { + ::dsn::message_ptr msg = ::dsn::message::create_request(RPC_DSN_CLI_CALL, timeout_milliseconds, hash); + marshall(msg->writer(), c); + auto resp_task = ::dsn::service::rpc::call(p_server_addr ? *p_server_addr : _server, msg, nullptr); + resp_task->wait(); + if (resp_task->error() == ::dsn::ERR_SUCCESS) + { + unmarshall(resp_task->get_response()->reader(), resp); + } + return resp_task->error(); + } + + // - asynchronous with on-stack command and std::string + ::dsn::rpc_response_task_ptr begin_call( + const command& c, + void* context = nullptr, + int timeout_milliseconds = 0, + int reply_hash = 0, + int request_hash = 0, + const ::dsn::end_point *p_server_addr = nullptr) + { + return ::dsn::service::rpc::call_typed( + p_server_addr ? *p_server_addr : _server, + RPC_DSN_CLI_CALL, + c, + this, + &cli_client::end_call, + context, + request_hash, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_call( + ::dsn::error_code err, + const std::string& resp, + void* context) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply RPC_DSN_CLI_CALL err : " << err.to_string() << std::endl; + else + { + std::cout << "reply RPC_DSN_CLI_CALL ok" << std::endl; + } + } + + // - asynchronous with on-heap std::shared_ptr and std::shared_ptr + ::dsn::rpc_response_task_ptr begin_call2( + std::shared_ptr& c, + int timeout_milliseconds = 0, + int reply_hash = 0, + int request_hash = 0, + const ::dsn::end_point *p_server_addr = nullptr) + { + return ::dsn::service::rpc::call_typed( + p_server_addr ? *p_server_addr : _server, + RPC_DSN_CLI_CALL, + c, + this, + &cli_client::end_call2, + request_hash, + timeout_milliseconds, + reply_hash + ); + } + + virtual void end_call2( + ::dsn::error_code err, + std::shared_ptr& c, + std::shared_ptr& resp) + { + if (err != ::dsn::ERR_SUCCESS) std::cout << "reply RPC_DSN_CLI_CALL err : " << err.to_string() << std::endl; + else + { + std::cout << "reply RPC_DSN_CLI_CALL ok" << std::endl; + } + } + + +private: + ::dsn::end_point _server; +}; + +} \ No newline at end of file diff --git a/src/cli/cli.main.cpp b/src/cli/cli.main.cpp new file mode 100644 index 0000000000..87ba242d10 --- /dev/null +++ b/src/cli/cli.main.cpp @@ -0,0 +1,31 @@ +// apps +# include "cli_app.h" + +// tools +# include +# include +# include +# include +# include + +int main(int argc, char** argv) +{ + // register all possible service apps + dsn::service::system::register_service<::dsn::service::cli>("cli"); + + // register all possible tools and toollets + dsn::tools::register_tool("nativerun"); + dsn::tools::register_tool("simulator"); + dsn::tools::register_toollet("tracer"); + dsn::tools::register_toollet("profiler"); + dsn::tools::register_toollet("fault_injector"); + + // register necessary components +#ifdef DSN_NOT_USE_DEFAULT_SERIALIZATION + dsn::tools::register_component_provider("thrift"); +#endif + + // specify what services and tools will run in config file, then run + dsn::service::system::run("config.ini", true); + return 0; +} diff --git a/src/cli/cli.thrift b/src/cli/cli.thrift new file mode 100644 index 0000000000..8aff2b9397 --- /dev/null +++ b/src/cli/cli.thrift @@ -0,0 +1,12 @@ +namespace cpp dsn + +struct command +{ + 1:string cmd; + 2:list arguments; +} + +service cli +{ + string call(1:command c); +} diff --git a/src/cli/cli.types.h b/src/cli/cli.types.h new file mode 100644 index 0000000000..6d7345df26 --- /dev/null +++ b/src/cli/cli.types.h @@ -0,0 +1,63 @@ +# pragma once + +// +// uncomment the following line if you want to use +// data encoding/decoding from the original tool instead of rDSN +// in this case, you need to use these tools to generate +// type files with --gen=cpp etc. options +// +// !!! WARNING: not feasible for replicated service yet!!! +// +// # define DSN_NOT_USE_DEFAULT_SERIALIZATION + +# include + +# ifdef DSN_NOT_USE_DEFAULT_SERIALIZATION + +# include +# include "cli_types.h" + +namespace dsn { + // ---------- command ------------- + inline void marshall(::dsn::binary_writer& writer, const command& val) + { + boost::shared_ptr<::dsn::binary_writer_transport> transport(new ::dsn::binary_writer_transport(writer)); + ::apache::thrift::protocol::TBinaryProtocol proto(transport); + ::dsn::marshall_rpc_args(&proto, val, &command::write); + }; + + inline void unmarshall(::dsn::binary_reader& reader, __out_param command& val) + { + boost::shared_ptr<::dsn::binary_reader_transport> transport(new ::dsn::binary_reader_transport(reader)); + ::apache::thrift::protocol::TBinaryProtocol proto(transport); + ::dsn::unmarshall_rpc_args(&proto, val, &command::read); + }; + +} + + +# else // use rDSN's data encoding/decoding + +namespace dsn { + // ---------- command ------------- + struct command + { + std::string cmd; + std::vector< std::string> arguments; + }; + + inline void marshall(::dsn::binary_writer& writer, const command& val) + { + marshall(writer, val.cmd); + marshall(writer, val.arguments); + }; + + inline void unmarshall(::dsn::binary_reader& reader, __out_param command& val) + { + unmarshall(reader, val.cmd); + unmarshall(reader, val.arguments); + }; + +} + +#endif diff --git a/src/cli/cli_app.cpp b/src/cli/cli_app.cpp new file mode 100644 index 0000000000..f98d450c77 --- /dev/null +++ b/src/cli/cli_app.cpp @@ -0,0 +1,130 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# include "cli_app.h" +# include + +namespace dsn { + namespace service { + + cli::cli(service_app_spec* s) + : service_app(s) + { + _timeout_seconds = 10; // 10 seconds by default + } + + void usage() + { + std::cout << "------------ rcli commands ------" << std::endl; + std::cout << "help: show this message" << std::endl; + std::cout << "exit: exit the console" << std::endl; + std::cout << "remote: set cli target by 'remote %machine% %port% %timeout_seconds%" << std::endl; + std::cout << "all other commands are sent to remote target %machine%:%port%" << std::endl; + std::cout << "---------------------------------" << std::endl; + } + + error_code cli::start(int argc, char** argv) + { + + std::cout << "dsn remote cli begin ..." << std::endl; + usage(); + + while (true) + { + std::string cmdline; + std::cout << ">"; + std::getline(std::cin, cmdline); + + std::string scmd = cmdline; + std::vector args; + + utils::split_args(scmd.c_str(), args, ' '); + + if (args.size() < 1) + continue; + + std::string cmd = args[0]; + if (cmd == "help") + { + usage(); + continue; + } + else if (cmd == "exit") + { + exit(0); + continue; + } + else if (cmd == "remote") + { + if (args.size() < 4) + { + std::cout << "invalid parameters for remote command, try help" << std::endl; + continue; + } + else + { + std::string machine = args[1]; + int port = atoi(args[2].c_str()); + _timeout_seconds = atoi(args[3].c_str()); + _target = end_point(machine.c_str(), port); + std::cout << "remote target is set to " << machine << ":" << port << ", timeout = " << _timeout_seconds << " seconds" < +# include "cli.client.h" + +namespace dsn { + namespace service { + + class cli : public service_app + { + public: + cli(service_app_spec* s); + virtual error_code start(int argc, char** argv); + virtual void stop(bool cleanup = false); + + private: + cli_client _client; + end_point _target; + int _timeout_seconds; + }; + } +} diff --git a/src/cli/config.ini b/src/cli/config.ini new file mode 100644 index 0000000000..c718e9aebe --- /dev/null +++ b/src/cli/config.ini @@ -0,0 +1,67 @@ +[apps.cli] +name = cli +type = cli +arguments = +run = true +count = 1 + +[core] + +;tool = simulator +tool = nativerun +;toollets = tracer, profiler +;fault_injector +pause_on_start = false +cli_local = false + +[tools.simulator] +random_seed = 2756568580 +use_given_random_seed = true + +[network] +; how many network threads for network library (used by asio) +io_service_worker_count = 2 + +[network.8101] +; channel = network_header_format, network_provider_name, buffer_block_size +;RPC_CHANNEL_TCP = NET_HDR_DSN, dsn::tools::asio_network_provider, 65536 + +;RPC_CHANNEL_TCP = NET_HDR_THRIFT, dsn::tools::asio_network_provider, 65536 + + +[task.default] +is_trace = true +is_profile = true +allow_inline = false +rpc_call_channel = RPC_CHANNEL_TCP +fast_execution_in_network_thread = false +rpc_message_header_format = dsn +rpc_timeout_milliseconds = 5000 + +[task.LPC_AIO_IMMEDIATE_CALLBACK] +is_trace = false +is_profile = false +allow_inline = false + +[task.LPC_RPC_TIMEOUT] +is_trace = false +is_profile = false + +; specification for each thread pool +[threadpool.default] + +[threadpool.THREAD_POOL_DEFAULT] +name = default +partitioned = false +worker_count = 2 +; max_input_queue_length = 1024 +worker_priority = THREAD_xPRIORITY_NORMAL + +; BoundedQueueAdmissionController MaxTaskQueueSize +; SingleRpcClassResponseTimeAdmissionController RpcRequestEventCode PercentileType(0-4) LatencyThreshold100ns(from task create to end in local process) +; counter percentile type (0-4): 999, 99, 95, 90, 50 +;admission_controller_factory_name = SingleRpcClassResponseTimeAdmissionController +;admission_controller_arguments = RPC_TEST 1 20000 + +;admission_controller_factory_name = BoundedQueueAdmissionController +;admission_controller_arguments = 100 diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt new file mode 100644 index 0000000000..d00e44c273 --- /dev/null +++ b/src/core/CMakeLists.txt @@ -0,0 +1,5 @@ +#set(proj_name dsn.core) +#file(GLOB proj_src *.cpp) +#add_library(${proj_name} STATIC ${proj_src}) +#install(TARGETS ${proj_name} DESTINATION lib) +dsn_add_library(dsn.core) diff --git a/src/core/admission_controller.cpp b/src/core/admission_controller.cpp new file mode 100644 index 0000000000..e0894ed8b6 --- /dev/null +++ b/src/core/admission_controller.cpp @@ -0,0 +1,130 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include "task_engine.h" + +namespace dsn { + +// +////-------------------------- BoundedQueueAdmissionController -------------------------------------------------- +// +//// arguments: MaxTaskQueueSize +//BoundedQueueAdmissionController::BoundedQueueAdmissionController(task_queue* q, std::vector& sargs) +// : admission_controller(q, sargs) +//{ +// if (sargs.size() > 0) +// { +// _maxTaskQueueSize = atoi(sargs[0].c_str()); +// if (_maxTaskQueueSize <= 0) +// { +// dassert (false, "Invalid arguments for BoundedQueueAdmissionController: MaxTaskQueueSize = '%s'", sargs[0].c_str()); +// } +// } +// else +// { +// dassert (false, "arguments for BoundedQueueAdmissionController is missing: MaxTaskQueueSize"); +// } +//} +// +//BoundedQueueAdmissionController::~BoundedQueueAdmissionController(void) +//{ +//} +// +//bool BoundedQueueAdmissionController::is_task_accepted(task_ptr& task) +//{ +// if (InQueueTaskCount() < _maxTaskQueueSize || task->spec().pool->shared_same_worker_with_current_task(task)) +// { +// return true; +// } +// else +// { +// return false; +// } +//} +// +//int BoundedQueueAdmissionController::get_system_utilization() +//{ +// return static_cast(100.0 * static_castInQueueTaskCount() / static_cast_maxTaskQueueSize); +//} +// +////------------------------------ SingleRpcClassResponseTimeAdmissionController ---------------------------------------------------------------- +// +//// args: task_code PercentileType LatencyThreshold100ns(from task create to end in local process) +//// +// +//SingleRpcClassResponseTimeAdmissionController::SingleRpcClassResponseTimeAdmissionController(task_queue* q, std::vector& sargs) +// : admission_controller(q, sargs) +//{ +// if (sargs.size() >= 3) +// { +// _rpcCode = enum_from_string(sargs[0].c_str(), TASK_CODE_INVALID); +// _percentile = atoi(sargs[1].c_str()); +// _latencyThreshold100ns = atoi(sargs[2].c_str()); +// +// if (TASK_CODE_INVALID == _rpcCode || task_spec::get(_rpcCode).type != TASK_TYPE_RPC_REQUEST +// || _latencyThreshold100ns <= 0 +// || _percentile < 0 +// || _percentile >= 5 +// ) +// { +// dassert (false, "Invalid arguments for SingleRpcClassResponseTimeAdmissionController: RpcRequestEventCode PercentileType(0-4) LatencyThreshold100ns\n" +// "\tcounter percentile type (0-4): 999, 99, 95, 90, 50\n"); +// } +// +// _counter = task_spec::get(_rpcCode).rpc_server_latency_100ns; +// } +// else +// { +// dassert (false, "arguments for SingleRpcClassResponseTimeAdmissionController is missing: RpcRequestEventCode PercentileType(0-4) LatencyThreshold100ns\n" +// "\tcounter percentile type (0-4): 999, 99, 95, 90, 50\n"); +// } +//} +// +//SingleRpcClassResponseTimeAdmissionController::~SingleRpcClassResponseTimeAdmissionController(void) +//{ +//} +// +//bool SingleRpcClassResponseTimeAdmissionController::is_task_accepted(task_ptr& task) +//{ +// if (task->spec().type != TASK_TYPE_RPC_REQUEST +// //|| task->spec().code == _rpcCode +// || _counter->get_percentile(_percentile) < _latencyThreshold100ns +// ) +// { +// return true; +// } +// else +// { +// return false; +// } +//} +// +//int SingleRpcClassResponseTimeAdmissionController::get_system_utilization() +//{ +// return static_cast(100.0 * static_cast_counter->get_percentile(_percentile) / static_cast_latencyThreshold100ns); +//} + +} // end namespace diff --git a/src/core/aio_provider.cpp b/src/core/aio_provider.cpp new file mode 100644 index 0000000000..1a6279b0df --- /dev/null +++ b/src/core/aio_provider.cpp @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include "disk_engine.h" +# include + +namespace dsn { + +aio_provider::aio_provider(disk_engine* disk, aio_provider* inner_provider) + : _engine(disk) +{ +} + +service_node* aio_provider::node() const +{ + return _engine->node(); +} + +void aio_provider::complete_io(aio_task_ptr& aio, error_code err, uint32_t bytes, int delay_milliseconds) +{ + _engine->complete_io(aio, err, bytes, delay_milliseconds); +} + +} // end namespace dsn diff --git a/src/core/command_manager.cpp b/src/core/command_manager.cpp new file mode 100644 index 0000000000..b0e685cf95 --- /dev/null +++ b/src/core/command_manager.cpp @@ -0,0 +1,246 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "command_manager.h" +# include +# include +# include +# include +# include +# include +# include "service_engine.h" + +# define __TITLE__ "command_manager" + +namespace dsn { + + + void register_command( + const std::vector& commands, // commands, e.g., {"help", "Help", "HELP", "h", "H"} + const char* help, // help info for users + command_handler handler + ) + { + command_manager::instance().register_command(commands, help, handler); + } + + void register_command( + const char* command, // commands, e.g., "help" + const char* help, // help info for users + command_handler handler + ) + { + std::vector cmds; + cmds.push_back(command); + register_command(cmds, help, handler); + } + + void register_command( + const char** commands, // commands, e.g., {"help", "Help", nullptr} + const char* help, // help info for users + command_handler handler + ) + { + std::vector cmds; + while (*commands != nullptr) + { + cmds.push_back(*commands); + commands++; + } + + register_command(cmds, help, handler); + } + + void command_manager::register_command(const std::vector& commands, const char* help, command_handler handler) + { + utils::auto_write_lock l(_lock); + + for (auto cmd : commands) + { + if (cmd != nullptr) + { + auto it = _handlers.find(std::string(cmd)); + dassert(it == _handlers.end(), "command '%s' already regisered", cmd); + } + } + + command* c = new command; + c->help = help; + c->handler = handler; + + for (auto cmd : commands) + { + if (cmd != nullptr) + { + _handlers[std::string(cmd)] = c; + } + } + } + + bool command_manager::run_command(const std::string& cmdline, __out_param std::string& output) + { + std::string scmd = cmdline; + std::vector args; + + utils::split_args(scmd.c_str(), args, ' '); + + if (args.size() < 1) + return false; + + std::vector args2; + for (size_t i = 1; i < args.size(); i++) + { + args2.push_back(args[i]); + } + + return run_command(args[0], args2, output); + } + + bool command_manager::run_command(const std::string& cmd, const std::vector& args, __out_param std::string& output) + { + command* h = nullptr; + { + utils::auto_read_lock l(_lock); + auto it = _handlers.find(cmd); + if (it != _handlers.end()) + h = it->second; + } + + if (h == nullptr) + { + output = std::string("unknown command '") + cmd + "'"; + return false; + } + else + { + output = h->handler(args); + return true; + } + } + + void command_manager::run_console() + { + std::cout << "dsn cli begin ... (type 'help' + Enter to learn more)" << std::endl; + + while (true) + { + std::string cmdline; + std::cout << ">"; + std::getline(std::cin, cmdline); + + std::string result; + run_command(cmdline, result); + std::cout << result << std::endl; + } + } + + void command_manager::start_local_cli() + { + new std::thread(std::bind(&command_manager::run_console, this)); + } + + DEFINE_TASK_CODE_RPC(RPC_DSN_CLI_CALL, TASK_PRIORITY_HIGH, THREAD_POOL_DEFAULT); + + class cli_rpc_request_task : public rpc_request_task + { + public: + cli_rpc_request_task(message_ptr& request, service_node* node) + : rpc_request_task(request, node) + { + } + + virtual void exec() + { + command_manager::instance().on_remote_cli(get_request()); + } + }; + + class cli_rpc_server_handler : public rpc_server_handler + { + public: + virtual rpc_request_task_ptr new_request_task(message_ptr& request, service_node* node) + { + return rpc_request_task_ptr(new cli_rpc_request_task(request, node)); + } + }; + + void command_manager::start_remote_cli() + { + ::dsn::service_engine::instance().register_system_rpc_handler(RPC_DSN_CLI_CALL, "dsn.cli", new cli_rpc_server_handler()); + } + + void command_manager::on_remote_cli(message_ptr& request) + { + std::string cmd; + unmarshall(request->reader(), cmd); + + std::vector args; + unmarshall(request->reader(), args); + + std::string result; + run_command(cmd, args, result); + + auto resp = request->create_response(); + marshall(resp->writer(), result); + + ::dsn::service::rpc::reply(resp); + } + + command_manager::command_manager() + { + register_command( + {"help", "h", "H", "Help", nullptr}, + "help [command] - display help information", + [this](const std::vector& args) + { + std::stringstream ss; + + if (args.size() == 0) + { + utils::auto_read_lock l(_lock); + for (auto c : this->_handlers) + { + ss.width(6); + ss << std::left << c.first << ": " << c.second->help << std::endl; + } + } + else + { + utils::auto_read_lock l(_lock); + auto it = _handlers.find(args[0]); + if (it == _handlers.end()) + ss << "cannot find command '" << args[0] << "'" << std::endl; + else + { + ss.width(6); + ss << std::left << it->first << ": " << it->second->help << std::endl; + } + } + + return ss.str(); + } + ); + } +} diff --git a/src/core/command_manager.h b/src/core/command_manager.h new file mode 100644 index 0000000000..e21f21fa0d --- /dev/null +++ b/src/core/command_manager.h @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +# include +# include +# include +# include + +namespace dsn { + + class command_manager : public ::dsn::utils::singleton + { + public: + command_manager(); + + void register_command(const std::vector& commands, const char* help, command_handler handler); + bool run_command(const std::string& cmdline, __out_param std::string& output); + void run_console(); + void start_local_cli(); + void start_remote_cli(); + void on_remote_cli(message_ptr& request); + + private: + bool run_command(const std::string& cmd, const std::vector& args, __out_param std::string& output); + + private: + struct command + { + const char* help; + command_handler handler; + }; + + ::dsn::utils::rw_lock _lock; + std::map _handlers; + }; + +} diff --git a/src/core/configuration.cpp b/src/core/configuration.cpp new file mode 100644 index 0000000000..bdd97df42a --- /dev/null +++ b/src/core/configuration.cpp @@ -0,0 +1,303 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include +# include +# include + +namespace dsn { + +configuration::configuration(const char* file_name) +{ + _file_name = std::string(file_name); + + FILE* fd = ::fopen(file_name, "rb"); + if (fd == nullptr) + { + printf("Cannot open file %s, err=%s", file_name, strerror(errno)); + return; + } + ::fseek(fd, 0, SEEK_END); + int len = ftell(fd); + if (len == -1 || len == 0) + { + printf("Cannot get length of %s, err=%s", file_name, strerror(errno)); + ::fclose(fd); + return; + } + + int fileLength = len; + _file_data.reset((char*)malloc(len+1)); + char* fileData = _file_data.get(); + + ::fseek(fd, 0, SEEK_SET); + auto sz = ::fread(fileData, len, 1, fd); + ::fclose(fd); + if (sz != 1) + { + printf("Cannot read correct data of %s, err=%s", file_name, strerror(errno)); + return; + } + ((char*)fileData)[fileLength] = '\n'; + + // + // parse mapped file and build conf map + // + std::map* pSection = nullptr; + char *p, *pLine = (char*)"", *pNextLine, *pEnd, *pSectionName = nullptr, *pEqual; + int lineno = 0; + unsigned int indexInSection = 0; + + p = (char*)fileData; + pEnd = p + fileLength; + + while (p < pEnd) { + // + // get line + // + lineno++; + while (*p == ' ' || *p == '\t' || *p == '\r') p++; + + pLine = p; + int shift = 0; + while (*p != '\n' && p < pEnd) + { + if (*p == '#' || *p == ';') + { + if (p != pLine && *(p-1) == '^') + { + shift++; + } + else + { + *p = '\0'; + } + } + + if (shift > 0) + { + *(p-shift) = *p; + } + p++; + } + *(p-shift) = '\0'; + pNextLine = ++p; + + // + // parse line + // + p = pLine; + if (*p == '\0') goto Next; // skip comment line or empty line + pEqual = strchr(p, '='); + if (nullptr == pEqual && *p != '[') { + goto ConfReg; + } + if (nullptr != pEqual && *p == '[') + goto err; + + // + // conf + // + if (pEqual) + { +ConfReg: + if (pSection == nullptr) { + printf("configuration section not defined"); + goto err; + } + if (pEqual) *pEqual = '\0'; + char* pKey = utils::trim_string(p); + char* pValue = pEqual ? utils::trim_string(++pEqual) : nullptr; + if (*pKey == '\0') + goto err; + + if (pSection->find((const char*)pKey) != pSection->end()) + { + auto it = pSection->find((const char*)pKey); + + printf("Warning: skip redefinition of option [%s] %s (line %u), already defined as [%s] %s (line %u)\n", + pSectionName, + pKey, + lineno, + it->second.section, + it->second.key, + it->second.line + ); + } + else + { + conf cf; + cf.section = (const char*)pSectionName; + cf.key = (const char*)pKey; + cf.value = pValue; + cf.line = lineno; + pSection->insert(std::make_pair(std::string(pKey), cf)); + } + } + // + // section + // + else + { + char* pRight = strchr(p, ']'); + if (nullptr == pRight) + goto err; + *pRight = '\0'; + p++; + pSectionName = utils::trim_string(p); + if (*pSectionName == '\0') + goto err; + + if (has_section((const char*)pSectionName, false)) { + printf("RedefInition of section %s\n", pSectionName); + goto err; + } + + std::map sm; + auto it = _configs.insert(config_map::value_type(std::string(pSectionName), sm)); + assert (it.second); + pSection = &it.first->second; + indexInSection = 0; + } + + // + // iterate nextline + // +Next: + p = pNextLine; + } + return; + +err: + printf("Unexpected configure in %s(line %d): %s\n", file_name, lineno, pLine); + exit(-2); +} + +configuration::~configuration(void) +{ +} + +void configuration::get_all_sections(std::vector& sections) +{ + for (auto it = _configs.begin(); it != _configs.end(); it++) + { + sections.push_back(it->first); + } +} + +void configuration::get_all_keys(const char* section, std::vector& keys) +{ + auto it = _configs.find(section); + if (it != _configs.end()) + { + for (auto it2 = it->second.begin(); it2 != it->second.end(); it2++) + { + keys.push_back(it2->first); + } + } +} + +bool configuration::get_string_value_internal(const char* section, const char* key, const char* default_value, std::string& ov) +{ + auto it = _configs.find(section); + if (it != _configs.end()) + { + auto it2 = it->second.find(key); + if (it2 != it->second.end()) + { + ov = it2->second.value; + return true; + } + } + ov = default_value; + return false; +} + +std::string configuration::get_string_value(const char* section, const char* key, const char* default_value) +{ + std::string ov; + if (!get_string_value_internal(section, key, default_value, ov)) + { + printf("WARNING: configuration '[%s] %s' is not defined, default value is '%s'\n", + section, + key, + default_value + ); + } + return ov; +} + +std::list configuration::get_string_value_list(const char* section, const char* key, char splitter) +{ + std::string ov; + if (!get_string_value_internal(section, key, "", ov)) + { + printf("WARNING: configuration '[%s] %s' is not defined, default value is '%s'\n", + section, + key, + "" + ); + } + + std::list vs; + utils::split_args(ov.c_str(), vs, splitter); + + for (auto& v : vs) + { + v = std::string(utils::trim_string((char*)v.c_str())); + } + return vs; +} + +void configuration::register_config_change_notification(config_file_change_notifier notifier) +{ + dassert (false, "not implemented"); +} + +bool configuration::has_section(const char* section, bool warn_if_not) +{ + auto it = _configs.find(section); + bool r = (it != _configs.end()); + if (!r && warn_if_not) + { + printf("WARNING: configuration section '[%s]' is not defined, using default settings\n", section); + } + return r; +} + +bool configuration::has_key(const char* section, const char* key) +{ + auto it = _configs.find(section); + if (it != _configs.end()) + { + auto it2 = it->second.find(key); + return (it2 != it->second.end()); + } + return false; +} + +} diff --git a/src/core/coredump.posix.cpp b/src/core/coredump.posix.cpp new file mode 100644 index 0000000000..53bd652d7e --- /dev/null +++ b/src/core/coredump.posix.cpp @@ -0,0 +1,57 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include + +//#ifdef _WIN32 +#if defined(__MACH__) || defined(__linux__) + + +# define __TITLE__ "coredump" + +namespace dsn { + namespace utils { + + static std::string s_dump_dir; + static char s_app_name[256] = "unknown"; + + void coredump::init(const char* dump_dir) + { + s_dump_dir = dump_dir; + + // TODO: not implemented + } + + void coredump::write() + { + // TODO: not implemented + // + } + } +} + +# endif // #if defined(__MACH__) || defined(__linux__) + diff --git a/src/core/coredump.win.cpp b/src/core/coredump.win.cpp new file mode 100644 index 0000000000..601af81199 --- /dev/null +++ b/src/core/coredump.win.cpp @@ -0,0 +1,156 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include + +#ifdef _WIN32 + +# include +# include +# include +# include +# include +# pragma comment(lib, "PsApi.lib") + +# define __TITLE__ "coredump" + +namespace dsn { + namespace utils { + + static std::string s_dump_dir; + static char s_app_name[256] = "unknown"; + + static LONG WINAPI TopLevelFilter(struct _EXCEPTION_POINTERS *pExceptionInfo); + + void coredump::init(const char* dump_dir) + { + s_dump_dir = dump_dir; + + ::GetModuleBaseNameA(::GetCurrentProcess(), + ::GetModuleHandleA(NULL), + s_app_name, + 256 + ); + + ::SetUnhandledExceptionFilter(TopLevelFilter); + } + + void coredump::write() + { + TopLevelFilter(0); + } + + typedef BOOL(WINAPI *MINIDUMPWRITEDUMP)( + HANDLE hProcess, DWORD dwPid, HANDLE hFile, MINIDUMP_TYPE DumpType, + CONST PMINIDUMP_EXCEPTION_INFORMATION ExceptionParam, + CONST PMINIDUMP_USER_STREAM_INFORMATION UserStreamParam, + CONST PMINIDUMP_CALLBACK_INFORMATION CallbackParam + ); + + static LONG WINAPI TopLevelFilter(struct _EXCEPTION_POINTERS *pExceptionInfo) + { + LONG retval = EXCEPTION_CONTINUE_SEARCH; + HWND hParent = NULL; // find a better value for your app + + // firstly see if dbghelp.dll is around and has the function we need + // look next to the EXE first, as the one in System32 might be old + // (e.g. Windows 2000) + HMODULE hDll = NULL; + + if (hDll == NULL) + { + // load any version we can + hDll = ::LoadLibraryA("DBGHELP.DLL"); + } + + LPCSTR szResult = "core dump success"; + char szDumpPath[512]; + char szScratch[512]; + + dfatal("fatal exception, core dump started ..."); + + if (hDll) + { + MINIDUMPWRITEDUMP pDump = (MINIDUMPWRITEDUMP)::GetProcAddress(hDll, "MiniDumpWriteDump"); + if (pDump) + { + sprintf(szDumpPath, "%s\\%s_%d_%d.dmp", s_dump_dir.c_str(), s_app_name, ::GetCurrentProcessId(), time(NULL)); + + // create the file + HANDLE hFile = ::CreateFileA(szDumpPath, GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); + + if (hFile != INVALID_HANDLE_VALUE) + { + _MINIDUMP_EXCEPTION_INFORMATION ExInfo; + + ExInfo.ThreadId = ::GetCurrentThreadId(); + ExInfo.ExceptionPointers = pExceptionInfo; + ExInfo.ClientPointers = NULL; + + // write the dump + BOOL bOK = pDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, MiniDumpNormal, &ExInfo, NULL, NULL); + if (bOK) + { + sprintf(szScratch, "saved dump file to '%s'", szDumpPath); + szResult = szScratch; + retval = EXCEPTION_EXECUTE_HANDLER; + } + else + { + sprintf(szScratch, "failed to save dump file to '%s' (error %d)", szDumpPath, GetLastError()); + szResult = szScratch; + } + ::CloseHandle(hFile); + } + else + { + sprintf(szScratch, "failed to create dump file '%s' (error %d)", szDumpPath, GetLastError()); + szResult = szScratch; + } + } + else + { + szResult = "DBGHELP.DLL too old"; + } + } + else + { + szResult = "DBGHELP.DLL not found"; + } + + if (szResult) + { + derror("%s", szResult); + printf(szResult); + } + return retval; + } + + } +} + +# endif // _WIN32 diff --git a/src/core/crc.h b/src/core/crc.h new file mode 100644 index 0000000000..0d3c2da897 --- /dev/null +++ b/src/core/crc.h @@ -0,0 +1,486 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include + +namespace dsn { namespace utils { + +template struct crc_generator +{ + typedef uintxx_t uint; + static const uintxx_t MSB = ((uintxx_t) 1) << (8 * sizeof (uintxx_t) - 1); + static const uintxx_t POLY = uPoly; + static uintxx_t _crc_table[256]; + static uintxx_t _uX2N[64]; + + + // + // compute CRC + // + static + uintxx_t + compute ( + const void *pSrc, + size_t uSize, + uintxx_t uCrc + ) + { + const uint8_t *pData = (const uint8_t *) pSrc; + size_t uBytes; + + uCrc = ~uCrc; + + while (uSize > 15) + { + uBytes = 0x80000000u; + if (uBytes > uSize) + uBytes = uSize; + uSize -= uBytes; + + for (; uBytes > 15; uBytes -= 16, pData += 16) + { + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[ 0])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[ 1])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[ 2])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[ 3])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[ 4])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[ 5])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[ 6])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[ 7])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[ 8])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[ 9])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[10])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[11])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[12])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[13])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[14])] ^ (uCrc >> 8); + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[15])] ^ (uCrc >> 8); + } + + uSize += uBytes; + } + + for (uBytes = uSize; uBytes > 0; uBytes -= 1, pData += 1) + uCrc = _crc_table[(uint8_t) (uCrc ^ pData[0])] ^ (uCrc >> 8); + + uCrc = ~uCrc; + + return (uCrc); + }; + + + + // + // Returns (a * b) mod POLY. + // "a" and "b" are represented in "reversed" order -- LSB is x**(XX-1) coefficient, MSB is x^0 coefficient. + // "POLY" is represented in the same manner except for omitted x**XX coefficient + // + static + uintxx_t + MulPoly ( + uintxx_t a, + uintxx_t b + ) + { + uintxx_t r; + + if (a == 0) + return (0); + + r = 0; + do + { + if (a & MSB) + r ^= b; + + if (b & 1) + b = (b >> 1) ^ POLY; + else + b >>= 1; + + a <<= 1; + } + while (a != 0); + + return (r); + }; + + + // + // Returns (x ** (8*uSize)) mod POLY + // + static + uintxx_t + ComputeX_N (uint64_t uSize) + { + size_t i; + uintxx_t r; + + r = MSB; // r = 1 + for (i = 0; uSize != 0; uSize >>= 1, i += 1) + { + if (uSize & 1) + r = MulPoly (r, _uX2N[i]); + } + + return (r); + }; + + + // + // Allows to change initial CRC value + // + static + uintxx_t + ConvertInitialCrc ( + uintxx_t uNew, + uintxx_t uOld, + uintxx_t uCrc, + size_t uSize + ) + { + // + // CRC (A, uSize, uCrc) = (uCrc * x**uSize + A * x**XX) mod POLY (let's forget about double NOTs of uCrc) + // + // we know uCrc(uOld) = (uOld * x**uSize + A * x**XX) mod POLY; we need to compute + // uCrc(uNew) = (uNew * x**uSize + A * x**XX) mod POLY + // + // uCrc(uNew) = uCrc(Old) + (uNew - uOld) * x**uSize) + // + + uNew ^= uOld; + uOld = ComputeX_N (uSize); + uOld = MulPoly (uOld, uNew); + uCrc ^= uOld; + + return (uCrc); + }; + + + // + // Given + // uFinalCrcA = ComputeCrc (A, uSizeA, uInitialCrcA) + // and + // uFinalCrcB = ComputeCrc (B, uSizeB, uInitialCrcB), + // compute CRC of concatenation of A and B + // uFinalCrcAB = ComputeCrc (AB, uSizeA + uSizeB, uInitialCrcAB) + // without touching A and B + // + // NB: uSizeA and/or uSizeB may be 0s (this trick may be used to "recompute" CRC for another initial value) + // + + static + uintxx_t + concatenate ( + uintxx_t uInitialCrcAB, + uintxx_t uInitialCrcA, + uintxx_t uFinalCrcA, + uint64_t uSizeA, + uintxx_t uInitialCrcB, + uintxx_t uFinalCrcB, + uint64_t uSizeB + ) + { + uintxx_t uX_nA, uX_nB, uFinalCrcAB; + + // + // Crc (X, uSizeX, uInitialCrcX) = ~(((~uInitialCrcX) * x**uSizeX + X * x**XX) mod POLY) + // + + // + // first, convert CRC's to canonical values getting rid of double bitwise NOT around uCrc + // + uInitialCrcAB = ~uInitialCrcAB; + uInitialCrcA = ~uInitialCrcA; + uFinalCrcA = ~uFinalCrcA; + uInitialCrcB = ~uInitialCrcB; + uFinalCrcB = ~uFinalCrcB; + + // + // convert uFinalCrcX into canonical form, so that + // uFinalCrcX = (X * x**XX) mod POLY + // + uX_nA = ComputeX_N (uSizeA); + uFinalCrcA ^= MulPoly (uX_nA, uInitialCrcA); + uX_nB = ComputeX_N (uSizeB); + uFinalCrcB ^= MulPoly (uX_nB, uInitialCrcB); + + // + // we know + // uFinalCrcA = (A * x**XX) mod POLY + // uFinalCrcB = (B * x**XX) mod POLY + // and need to compute + // uFinalCrcAB = (AB * x**XX) mod POLY = + // = ((A * x**uSizeB + B) * x**XX) mod POLY = + // = (A * x**XX) * x**uSizeB + B * x**XX mod POLY = + // = uFinalCrcB + (uFinalCrcA * x**uSizeB) mod POLY + // + + uFinalCrcAB = uFinalCrcB ^ MulPoly (uFinalCrcA, uX_nB); + + // + // Finally, adjust initial value; we have + // uFinalCrcAB = (AB * x**XX) mod POLY + // but want to have + // uFinalCrcAB = (UInitialCrcAB * x**(uSizeA + uSizeB) + AB * x**XX) mod POLY + // + + uFinalCrcAB ^= MulPoly (uInitialCrcAB, MulPoly (uX_nA, uX_nB)); + + // convert back to double NOT + uFinalCrcAB = ~uFinalCrcAB; + + return (uFinalCrcAB); + }; + + + static + void + InitializeTables ( + void + ) + { + size_t i, j; + uintxx_t k; + + _uX2N[0] = MSB >> 8; + for (i = 1; i < sizeof (_uX2N) / sizeof (_uX2N[0]); ++i) + _uX2N[i] = MulPoly (_uX2N[i-1], _uX2N[i-1]); + + for (i = 0; i < 256; ++i) + { + k = (uintxx_t) i; + for (j = 0; j < 8; ++j) + { + if (k & 1) + k = (k >> 1) ^ POLY; + else + k = (k >> 1); + } + _crc_table[i] = k; + } + } + + static + void + PrintTables ( + char *pTypeName, + char *pClassName + ) + { + size_t i, w; + + InitializeTables (); + + printf ("%s %s::_uX2N[sizeof (%s::_uX2N) / sizeof (%s::_uX2N[0])] = {", pTypeName, pClassName, pClassName, pClassName); + for (i = w = 0; i < sizeof (_uX2N) / sizeof (_uX2N[0]); ++i) + { + if (i != 0) + printf (","); + if (w == 0) + printf ("\n "); + printf (" 0x%0*llx", static_cast (sizeof (uintxx_t) * 2), (uint64_t) _uX2N[i]); + w = (w + sizeof (uintxx_t)) & 31; + } + printf ("\n};\n\n"); + + printf ("%s %s::_crc_table[sizeof (%s::_crc_table) / sizeof (%s::_crc_table[0])] = {", pTypeName, pClassName, pClassName, pClassName); + for (i = w = 0; i < sizeof (_crc_table) / sizeof (_crc_table[0]); ++i) + { + if (i != 0) + printf (","); + if (w == 0) + printf ("\n "); + printf (" 0x%0*llx", static_cast (sizeof (uintxx_t) * 2), (uint64_t) _crc_table[i]); + w = (w + sizeof (uintxx_t)) & 31; + } + printf ("\n};\n\n"); + }; +}; + +#define BIT64(n) (1ull << (63 - (n))) +#define crc64_POLY ( \ + BIT64(63) + BIT64(61) + BIT64(59) + BIT64(58) + BIT64(56) + BIT64(55) + BIT64(52) + BIT64(49) + BIT64(48) + BIT64(47) + \ + BIT64(46) + BIT64(44) + BIT64(41) + BIT64(37) + BIT64(36) + BIT64(34) + BIT64(32) + BIT64(31) + BIT64(28) + BIT64(26) + BIT64(23) + \ + BIT64(22) + BIT64(19) + BIT64(16) + BIT64(13) + BIT64(12) + BIT64(10) + BIT64( 9) + BIT64( 6) + BIT64( 4) + BIT64( 3) + BIT64( 0) \ +) + +#define BIT32(n) (1u << (31 - (n))) +#define crc32_POLY ( \ + BIT32(28) + BIT32(27) + BIT32(26) + BIT32(25) + BIT32(23) + BIT32(22) + BIT32(20) + BIT32(19) + \ + BIT32(18) + BIT32(14) + BIT32(13) + BIT32(11) + BIT32(10) + BIT32( 9) + BIT32( 8) + BIT32( 6) + BIT32(0) \ +) + +typedef crc_generator crc32; +typedef crc_generator crc64; + +extern crc32 PdiCrc32; +extern crc64 PdiCrc64; + +template<> +uint32_t crc32::_uX2N[sizeof (crc32::_uX2N) / sizeof (crc32::_uX2N[0])] = { + 0x00800000, 0x00008000, 0x82f63b78, 0x6ea2d55c, 0x18b8ea18, 0x510ac59a, 0xb82be955, 0xb8fdb1e7, + 0x88e56f72, 0x74c360a4, 0xe4172b16, 0x0d65762a, 0x35d73a62, 0x28461564, 0xbf455269, 0xe2ea32dc, + 0xfe7740e6, 0xf946610b, 0x3c204f8f, 0x538586e3, 0x59726915, 0x734d5309, 0xbc1ac763, 0x7d0722cc, + 0xd289cabe, 0xe94ca9bc, 0x05b74f3f, 0xa51e1f42, 0x40000000, 0x20000000, 0x08000000, 0x00800000, + 0x00008000, 0x82f63b78, 0x6ea2d55c, 0x18b8ea18, 0x510ac59a, 0xb82be955, 0xb8fdb1e7, 0x88e56f72, + 0x74c360a4, 0xe4172b16, 0x0d65762a, 0x35d73a62, 0x28461564, 0xbf455269, 0xe2ea32dc, 0xfe7740e6, + 0xf946610b, 0x3c204f8f, 0x538586e3, 0x59726915, 0x734d5309, 0xbc1ac763, 0x7d0722cc, 0xd289cabe, + 0xe94ca9bc, 0x05b74f3f, 0xa51e1f42, 0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000 +}; + +template<> +uint32_t crc32::_crc_table[sizeof (crc32::_crc_table) / sizeof (crc32::_crc_table[0])] = { + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351 +}; + +template<> +uint64_t crc64::_uX2N[sizeof (crc64::_uX2N) / sizeof (crc64::_uX2N[0])] = { + 0x0080000000000000, 0x0000800000000000, 0x0000000080000000, 0x9a6c9329ac4bc9b5, + 0x10f4bb0f129310d6, 0x70f05dcea2ebd226, 0x311211205672822d, 0x2fc297db0f46c96e, + 0xca4d536fabf7da84, 0xfb4cdc3b379ee6ed, 0xea261148df25140a, 0x59ccb2c07aa6c9b4, + 0x20b3674a839af27a, 0x2d8e1986da94d583, 0x42cdf4c20337635d, 0x1d78724bf0f26839, + 0xb96c84e0afb34bd5, 0x5d2e1fcd2df0a3ea, 0xcd9506572332be42, 0x23bda2427f7d690f, + 0x347a953232374f07, 0x1c2a807ac2a8ceea, 0x9b92ad0e14fe1460, 0x2574114889f670b2, + 0x4a84a6c45e3bf520, 0x915bbac21cd1c7ff, 0xb0290ec579f291f5, 0xcf2548505c624e6e, + 0xb154f27bf08a8207, 0xce4e92344baf7d35, 0x51da8d7e057c5eb3, 0x9fb10823f5be15df, + 0x73b825b3ff1f71cf, 0x5db436c5406ebb74, 0xfa7ed8f3ec3f2bca, 0xc4d58efdc61b9ef6, + 0xa7e39e61e855bd45, 0x97ad46f9dd1bf2f1, 0x1a0abb01f853ee6b, 0x3f0827c3348f8215, + 0x4eb68c4506134607, 0x4a46f6de5df34e0a, 0x2d855d6a1c57a8dd, 0x8688da58e1115812, + 0x5232f417fc7c7300, 0xa4080fb2e767d8da, 0xd515a7e17693e562, 0x1181f7c862e94226, + 0x9e23cd058204ca91, 0x9b8992c57a0aed82, 0xb2c0afb84609b6ff, 0x2f7160553a5ea018, + 0x3cd378b5c99f2722, 0x814054ad61a3b058, 0xbf766189fce806d8, 0x85a5e898ac49f86f, + 0x34830d11bc84f346, 0x9644d95b173c8c1c, 0x150401ac9ac759b1, 0xebe1f7f46fb00eba, + 0x8ee4ce0c2e2bd662, 0x4000000000000000, 0x2000000000000000, 0x0800000000000000 +}; + +template<> +uint64_t crc64::_crc_table[sizeof (crc64::_crc_table) / sizeof (crc64::_crc_table[0])] = { + 0x0000000000000000, 0x7f6ef0c830358979, 0xfedde190606b12f2, 0x81b31158505e9b8b, + 0xc962e5739841b68f, 0xb60c15bba8743ff6, 0x37bf04e3f82aa47d, 0x48d1f42bc81f2d04, + 0xa61cecb46814fe75, 0xd9721c7c5821770c, 0x58c10d24087fec87, 0x27affdec384a65fe, + 0x6f7e09c7f05548fa, 0x1010f90fc060c183, 0x91a3e857903e5a08, 0xeecd189fa00bd371, + 0x78e0ff3b88be6f81, 0x078e0ff3b88be6f8, 0x863d1eabe8d57d73, 0xf953ee63d8e0f40a, + 0xb1821a4810ffd90e, 0xceecea8020ca5077, 0x4f5ffbd87094cbfc, 0x30310b1040a14285, + 0xdefc138fe0aa91f4, 0xa192e347d09f188d, 0x2021f21f80c18306, 0x5f4f02d7b0f40a7f, + 0x179ef6fc78eb277b, 0x68f0063448deae02, 0xe943176c18803589, 0x962de7a428b5bcf0, + 0xf1c1fe77117cdf02, 0x8eaf0ebf2149567b, 0x0f1c1fe77117cdf0, 0x7072ef2f41224489, + 0x38a31b04893d698d, 0x47cdebccb908e0f4, 0xc67efa94e9567b7f, 0xb9100a5cd963f206, + 0x57dd12c379682177, 0x28b3e20b495da80e, 0xa900f35319033385, 0xd66e039b2936bafc, + 0x9ebff7b0e12997f8, 0xe1d10778d11c1e81, 0x606216208142850a, 0x1f0ce6e8b1770c73, + 0x8921014c99c2b083, 0xf64ff184a9f739fa, 0x77fce0dcf9a9a271, 0x08921014c99c2b08, + 0x4043e43f0183060c, 0x3f2d14f731b68f75, 0xbe9e05af61e814fe, 0xc1f0f56751dd9d87, + 0x2f3dedf8f1d64ef6, 0x50531d30c1e3c78f, 0xd1e00c6891bd5c04, 0xae8efca0a188d57d, + 0xe65f088b6997f879, 0x9931f84359a27100, 0x1882e91b09fcea8b, 0x67ec19d339c963f2, + 0xd75adabd7a6e2d6f, 0xa8342a754a5ba416, 0x29873b2d1a053f9d, 0x56e9cbe52a30b6e4, + 0x1e383fcee22f9be0, 0x6156cf06d21a1299, 0xe0e5de5e82448912, 0x9f8b2e96b271006b, + 0x71463609127ad31a, 0x0e28c6c1224f5a63, 0x8f9bd7997211c1e8, 0xf0f5275142244891, + 0xb824d37a8a3b6595, 0xc74a23b2ba0eecec, 0x46f932eaea507767, 0x3997c222da65fe1e, + 0xafba2586f2d042ee, 0xd0d4d54ec2e5cb97, 0x5167c41692bb501c, 0x2e0934dea28ed965, + 0x66d8c0f56a91f461, 0x19b6303d5aa47d18, 0x980521650afae693, 0xe76bd1ad3acf6fea, + 0x09a6c9329ac4bc9b, 0x76c839faaaf135e2, 0xf77b28a2faafae69, 0x8815d86aca9a2710, + 0xc0c42c4102850a14, 0xbfaadc8932b0836d, 0x3e19cdd162ee18e6, 0x41773d1952db919f, + 0x269b24ca6b12f26d, 0x59f5d4025b277b14, 0xd846c55a0b79e09f, 0xa72835923b4c69e6, + 0xeff9c1b9f35344e2, 0x90973171c366cd9b, 0x1124202993385610, 0x6e4ad0e1a30ddf69, + 0x8087c87e03060c18, 0xffe938b633338561, 0x7e5a29ee636d1eea, 0x0134d92653589793, + 0x49e52d0d9b47ba97, 0x368bddc5ab7233ee, 0xb738cc9dfb2ca865, 0xc8563c55cb19211c, + 0x5e7bdbf1e3ac9dec, 0x21152b39d3991495, 0xa0a63a6183c78f1e, 0xdfc8caa9b3f20667, + 0x97193e827bed2b63, 0xe877ce4a4bd8a21a, 0x69c4df121b863991, 0x16aa2fda2bb3b0e8, + 0xf86737458bb86399, 0x8709c78dbb8deae0, 0x06bad6d5ebd3716b, 0x79d4261ddbe6f812, + 0x3105d23613f9d516, 0x4e6b22fe23cc5c6f, 0xcfd833a67392c7e4, 0xb0b6c36e43a74e9d, + 0x9a6c9329ac4bc9b5, 0xe50263e19c7e40cc, 0x64b172b9cc20db47, 0x1bdf8271fc15523e, + 0x530e765a340a7f3a, 0x2c608692043ff643, 0xadd397ca54616dc8, 0xd2bd67026454e4b1, + 0x3c707f9dc45f37c0, 0x431e8f55f46abeb9, 0xc2ad9e0da4342532, 0xbdc36ec59401ac4b, + 0xf5129aee5c1e814f, 0x8a7c6a266c2b0836, 0x0bcf7b7e3c7593bd, 0x74a18bb60c401ac4, + 0xe28c6c1224f5a634, 0x9de29cda14c02f4d, 0x1c518d82449eb4c6, 0x633f7d4a74ab3dbf, + 0x2bee8961bcb410bb, 0x548079a98c8199c2, 0xd53368f1dcdf0249, 0xaa5d9839ecea8b30, + 0x449080a64ce15841, 0x3bfe706e7cd4d138, 0xba4d61362c8a4ab3, 0xc52391fe1cbfc3ca, + 0x8df265d5d4a0eece, 0xf29c951de49567b7, 0x732f8445b4cbfc3c, 0x0c41748d84fe7545, + 0x6bad6d5ebd3716b7, 0x14c39d968d029fce, 0x95708ccedd5c0445, 0xea1e7c06ed698d3c, + 0xa2cf882d2576a038, 0xdda178e515432941, 0x5c1269bd451db2ca, 0x237c997575283bb3, + 0xcdb181ead523e8c2, 0xb2df7122e51661bb, 0x336c607ab548fa30, 0x4c0290b2857d7349, + 0x04d364994d625e4d, 0x7bbd94517d57d734, 0xfa0e85092d094cbf, 0x856075c11d3cc5c6, + 0x134d926535897936, 0x6c2362ad05bcf04f, 0xed9073f555e26bc4, 0x92fe833d65d7e2bd, + 0xda2f7716adc8cfb9, 0xa54187de9dfd46c0, 0x24f29686cda3dd4b, 0x5b9c664efd965432, + 0xb5517ed15d9d8743, 0xca3f8e196da80e3a, 0x4b8c9f413df695b1, 0x34e26f890dc31cc8, + 0x7c339ba2c5dc31cc, 0x035d6b6af5e9b8b5, 0x82ee7a32a5b7233e, 0xfd808afa9582aa47, + 0x4d364994d625e4da, 0x3258b95ce6106da3, 0xb3eba804b64ef628, 0xcc8558cc867b7f51, + 0x8454ace74e645255, 0xfb3a5c2f7e51db2c, 0x7a894d772e0f40a7, 0x05e7bdbf1e3ac9de, + 0xeb2aa520be311aaf, 0x944455e88e0493d6, 0x15f744b0de5a085d, 0x6a99b478ee6f8124, + 0x224840532670ac20, 0x5d26b09b16452559, 0xdc95a1c3461bbed2, 0xa3fb510b762e37ab, + 0x35d6b6af5e9b8b5b, 0x4ab846676eae0222, 0xcb0b573f3ef099a9, 0xb465a7f70ec510d0, + 0xfcb453dcc6da3dd4, 0x83daa314f6efb4ad, 0x0269b24ca6b12f26, 0x7d0742849684a65f, + 0x93ca5a1b368f752e, 0xeca4aad306bafc57, 0x6d17bb8b56e467dc, 0x12794b4366d1eea5, + 0x5aa8bf68aecec3a1, 0x25c64fa09efb4ad8, 0xa4755ef8cea5d153, 0xdb1bae30fe90582a, + 0xbcf7b7e3c7593bd8, 0xc399472bf76cb2a1, 0x422a5673a732292a, 0x3d44a6bb9707a053, + 0x759552905f188d57, 0x0afba2586f2d042e, 0x8b48b3003f739fa5, 0xf42643c80f4616dc, + 0x1aeb5b57af4dc5ad, 0x6585ab9f9f784cd4, 0xe436bac7cf26d75f, 0x9b584a0fff135e26, + 0xd389be24370c7322, 0xace74eec0739fa5b, 0x2d545fb4576761d0, 0x523aaf7c6752e8a9, + 0xc41748d84fe75459, 0xbb79b8107fd2dd20, 0x3acaa9482f8c46ab, 0x45a459801fb9cfd2, + 0x0d75adabd7a6e2d6, 0x721b5d63e7936baf, 0xf3a84c3bb7cdf024, 0x8cc6bcf387f8795d, + 0x620ba46c27f3aa2c, 0x1d6554a417c62355, 0x9cd645fc4798b8de, 0xe3b8b53477ad31a7, + 0xab69411fbfb21ca3, 0xd407b1d78f8795da, 0x55b4a08fdfd90e51, 0x2ada5047efec8728 +}; + + +#undef crc32_POLY +#undef crc64_POLY +#undef BIT64 +#undef BIT32 + +} } // end namespace diff --git a/src/core/disk_engine.cpp b/src/core/disk_engine.cpp new file mode 100644 index 0000000000..900b453c22 --- /dev/null +++ b/src/core/disk_engine.cpp @@ -0,0 +1,138 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "disk_engine.h" +# include +# include +# include +# include +# include + +#define __TITLE__ "disk_engine" + +using namespace dsn::utils; + +namespace dsn { + +//----------------- disk_engine ------------------------ +disk_engine::disk_engine(service_node* node) +{ + _request_count = 0; + _is_running = false; + _provider = nullptr; + _node = node; +} + +disk_engine::~disk_engine() +{ +} + +void disk_engine::start(aio_provider* provider) +{ + auto_lock l(_lock); + if (_is_running) + return; + + _provider = provider; + _is_running = true; +} + +handle_t disk_engine::open(const char* file_name, int flag, int pmode) +{ + return _provider->open(file_name, flag, pmode); +} + +error_code disk_engine::close(handle_t hFile) +{ + return _provider->close(hFile); +} + +void disk_engine::read(aio_task_ptr& aio) +{ + aio->aio()->type = AIO_Read; + return start_io(aio); +} + +void disk_engine::write(aio_task_ptr& aio) +{ + aio->aio()->type = AIO_Write; + return start_io(aio); +} + +void disk_engine::start_io(aio_task_ptr& aio_tsk) +{ + auto aio = aio_tsk->aio(); + aio->engine = this; + + { + auto_lock l(_lock); + if (!_is_running) + { + aio_tsk->enqueue(ERR_SERVICE_NOT_FOUND, 0, _node); + return; + } + + _request_count++; + } + + aio_tsk->add_ref(); + + // TODO: profiling, throttling here + + if (aio_tsk->spec().on_aio_call.execute(task::get_current_task(), aio_tsk.get(), true)) + { + return _provider->aio(aio_tsk); + } + else + { + aio_tsk->enqueue(ERR_FILE_OPERATION_FAILED, 0, _node); + } +} + +void disk_engine::complete_io(aio_task_ptr& aio, error_code err, uint32_t bytes, int delay_milliseconds) +{ + // TODO: failure injection, profiling, throttling + + if (err != ERR_SUCCESS) + { + dwarn( + "disk operation failure with code %s, err = 0x%x, aio task id = %llx", + aio->spec().name, + err.get(), + aio->id() + ); + } + + { + auto_lock l(_lock); + _request_count--; + } + + aio->enqueue(err, bytes, _node); + aio->release_ref(); +} + + +} // end namespace diff --git a/src/core/disk_engine.h b/src/core/disk_engine.h new file mode 100644 index 0000000000..84f3ef6f07 --- /dev/null +++ b/src/core/disk_engine.h @@ -0,0 +1,65 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include "service_engine.h" +# include +# include + +namespace dsn { + +class disk_engine +{ +public: + disk_engine(service_node* node); + ~disk_engine(); + + void start(aio_provider* provider); + + // asynchonous file read/write + handle_t open(const char* fileName, int flag, int pmode); + error_code close(handle_t hFile); + void read(aio_task_ptr& aio); + void write(aio_task_ptr& aio); + + disk_aio_ptr prepare_aio_context(aio_task* tsk) { return _provider->prepare_aio_context(tsk); } + service_node* node() const { return _node; } + +private: + friend class aio_provider; + void start_io(aio_task_ptr& aio); + void complete_io(aio_task_ptr& aio, error_code err, uint32_t bytes, int delay_milliseconds = 0); + +private: + bool _is_running; + aio_provider *_provider; + service_node *_node; + + std::recursive_mutex _lock; + int _request_count; +}; + +} // end namespace diff --git a/src/core/end_point.cpp b/src/core/end_point.cpp new file mode 100644 index 0000000000..533df99be9 --- /dev/null +++ b/src/core/end_point.cpp @@ -0,0 +1,80 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# include + +# ifdef _WIN32 + + +# else +# include +# include +# include +# endif + +# include + +namespace dsn { + +const end_point end_point::INVALID; + +end_point::end_point(const char* str, uint16_t p) +{ + static std::once_flag flag; + static bool flag_inited = false; + if (!flag_inited) + { + std::call_once(flag, [&]() + { +#ifdef _WIN32 + WSADATA wsaData; + WSAStartup(MAKEWORD(2, 2), &wsaData); +#endif + flag_inited = true; + }); + } + + port = p; + name = std::string(str); + + sockaddr_in addr; + memset(&addr,0,sizeof(addr)); + addr.sin_family=AF_INET; + + if ((addr.sin_addr.s_addr = inet_addr(str)) == (unsigned int)(-1)) + { + hostent* hp = gethostbyname(str); + if (hp != 0) + { + memcpy((void*)&(addr.sin_addr.s_addr), (const void*)hp->h_addr, (size_t)hp->h_length); + } + } + + // network order + ip = (uint32_t)(addr.sin_addr.s_addr); +} + +} // end namespace diff --git a/src/core/env_provider.cpp b/src/core/env_provider.cpp new file mode 100644 index 0000000000..751a1d10e9 --- /dev/null +++ b/src/core/env_provider.cpp @@ -0,0 +1,65 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include + +namespace dsn { + +//------------ env_provider --------------- + +env_provider::env_provider(env_provider* inner_provider) +{ +} + +uint64_t env_provider::random64(uint64_t min, uint64_t max) +{ + uint64_t gap = max - min + 1; + if (gap == 0) + { + /*uint64_t a,b,c,d;*/ + return utils::get_random64(); + } + else if (gap == (uint64_t)RAND_MAX + 1) + { + return (uint64_t)std::rand(); + } + else + { + gap = static_cast(static_cast(97 * gap) * static_cast(std::rand()) / static_cast(RAND_MAX)); + gap = gap % (max - min + 1); + return min + gap; + } +} + +/*static*/ uint64_t env_provider::get_current_physical_time_ns() +{ + auto now = std::chrono::high_resolution_clock::now(); + auto nanos = std::chrono::duration_cast(now.time_since_epoch()).count(); + return nanos; +} + +} // end namespace diff --git a/src/core/error_code.cpp b/src/core/error_code.cpp new file mode 100644 index 0000000000..6e572bd17f --- /dev/null +++ b/src/core/error_code.cpp @@ -0,0 +1,27 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include + diff --git a/src/core/global_config.cpp b/src/core/global_config.cpp new file mode 100644 index 0000000000..dfe6606505 --- /dev/null +++ b/src/core/global_config.cpp @@ -0,0 +1,427 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include +# include +# include + +#define __TITLE__ "ConfigFile" + +namespace dsn { + +threadpool_spec::threadpool_spec(const threadpool_spec& source) + : pool_code(source.pool_code) +{ + *this = source; +} + +threadpool_spec& threadpool_spec::operator=(const threadpool_spec& source) +{ + name = source.name; + pool_code.reset(source.pool_code); + run = source.run; + worker_count = source.worker_count; + worker_priority = source.worker_priority; + worker_share_core = source.worker_share_core; + worker_affinity_mask = source.worker_affinity_mask; + max_input_queue_length = source.max_input_queue_length; + partitioned = source.partitioned; + + queue_factory_name = source.queue_factory_name; + worker_factory_name = source.worker_factory_name; + queue_aspects = source.queue_aspects; + worker_aspects = source.worker_aspects; + + admission_controller_factory_name = source.admission_controller_factory_name; + admission_controller_arguments = source.admission_controller_arguments; + + return *this; +} + +bool threadpool_spec::init(configuration_ptr& config, __out_param std::vector& specs) +{ + /* + [threadpool.default] + worker_count = 4 + worker_priority = THREAD_xPRIORITY_NORMAL + max_input_queue_length = 10000 + partitioned = false + queue_aspects = xxx + worker_aspects = xxx + admission_controller_factory_name = xxx + admission_controller_arguments = xxx + + [threadpool.THREAD_POOL_REPLICATION] + name = Thr.replication + run = true + worker_count = 4 + worker_priority = THREAD_xPRIORITY_NORMAL + max_input_queue_length = 10000 + partitioned = false + queue_aspects = xxx + worker_aspects = xxx + admission_controller_factory_name = xxx + admission_controller_arguments = xxx + */ + + threadpool_spec default_spec("placeholder"); + default_spec.worker_priority = enum_from_string(config->get_string_value("threadpool.default", "worker_priority", "THREAD_xPRIORITY_NORMAL").c_str(), THREAD_xPRIORITY_INVALID); + if (default_spec.worker_priority == THREAD_xPRIORITY_INVALID) + { + dlog(log_level_ERROR, __TITLE__, "invalid worker priority in [threadpool.default]"); + return false; + } + default_spec.worker_share_core = config->get_value("threadpool.default", "worker_share_core", true); + default_spec.worker_affinity_mask = static_cast(config->get_value("threadpool.default", "worker_affinity_mask", 0)); + if (false == default_spec.worker_share_core && 0 == default_spec.worker_affinity_mask) + { + default_spec.worker_affinity_mask = (1 << std::thread::hardware_concurrency()) - 1; + } + + default_spec.run = false; + default_spec.worker_count = config->get_value("threadpool.default", "worker_count", 1); + default_spec.max_input_queue_length = config->get_value("threadpool.default", "max_input_queue_length", 0xFFFFFFFFUL); + default_spec.partitioned = config->get_value("threadpool.default", "partitioned", false); + default_spec.queue_aspects = config->get_string_value_list("threadpool.default", "queue_aspects", ','); + default_spec.worker_aspects = config->get_string_value_list("threadpool.default", "worker_aspects", ','); + default_spec.admission_controller_factory_name = config->get_string_value("threadpool.default", "admission_controller_factory_name", ""); + default_spec.admission_controller_arguments = config->get_string_value("threadpool.default", "admission_controller_arguments", ""); + + for (int code = 0; code <= threadpool_code::max_value(); code++) + { + if (code == THREAD_POOL_INVALID || code == threadpool_code::from_string("placeholder", THREAD_POOL_INVALID)) + continue; + + std::string section_name = std::string("threadpool.") + std::string(threadpool_code::to_string(code)); + threadpool_spec spec(default_spec); + spec.pool_code.reset(threadpool_code::from_string(threadpool_code::to_string(code), THREAD_POOL_INVALID)); + spec.name = std::string(threadpool_code::to_string(code)); + spec.run = true; + + if (config->has_section(section_name.c_str())) + { + spec.name = config->get_string_value(section_name.c_str(), "name", threadpool_code::to_string(code)); + spec.run = config->get_value(section_name.c_str(), "run", true); + spec.worker_count = config->get_value(section_name.c_str(), "worker_count", default_spec.worker_count); + spec.max_input_queue_length = config->get_value(section_name.c_str(), "max_input_queue_length", default_spec.max_input_queue_length); + spec.partitioned = config->get_value(section_name.c_str(), "partitioned", default_spec.partitioned); + spec.queue_aspects = config->get_string_value_list(section_name.c_str(), "queue_aspects", ','); + spec.worker_priority = enum_from_string(config->get_string_value(section_name.c_str(), "worker_priority", "THREAD_xPRIORITY_NORMAL").c_str(), THREAD_xPRIORITY_INVALID); + + spec.worker_share_core = config->get_value(section_name.c_str(), "worker_share_core", true); + spec.worker_affinity_mask = static_cast(config->get_value(section_name.c_str(), "worker_affinity_mask", 0)); + if (false == spec.worker_share_core && 0 == spec.worker_affinity_mask) + { + spec.worker_affinity_mask = (1 << std::thread::hardware_concurrency()) - 1; + } + + if (spec.queue_aspects.size() == 0) + { + spec.queue_aspects = default_spec.queue_aspects; + } + + spec.worker_aspects = config->get_string_value_list(section_name.c_str(), "worker_aspects", ','); + if (spec.worker_aspects.size() == 0) + { + spec.worker_aspects = default_spec.worker_aspects; + } + + spec.admission_controller_factory_name = config->get_string_value(section_name.c_str(), "admission_controller_factory_name", default_spec.admission_controller_factory_name.c_str()); + spec.admission_controller_arguments = config->get_string_value(section_name.c_str(), "admission_controller_arguments", default_spec.admission_controller_arguments.c_str()); + } + + if (spec.run) + { + specs.push_back(spec); + } + } + + return true; +} + +service_app_spec::service_app_spec(const service_app_spec& r) +{ + id = r.id; + name = r.name; + type = r.type; + arguments = r.arguments; + ports = r.ports; + delay_seconds = r.delay_seconds; + run = r.run; +} + +bool service_app_spec::init(const char* section, configuration_ptr& config) +{ + id = 0; + name = config->get_string_value(section, "name", ""); + type = config->get_string_value(section, "type", ""); + arguments = config->get_string_value(section, "arguments", ""); + + ports.clear(); + std::list ports_str = config->get_string_value_list(section, "ports", ','); + for (auto& s : ports_str) + { + int p = atoi(s.c_str()); + if (p != 0) + { + dassert(p > 1024, "specified port is either 0 (no listen port) or greater than 1024"); + ports.push_back(p); + } + } + std::sort(ports.begin(), ports.end()); + + delay_seconds = config->get_value(section, "delay_seconds", 0); + run = config->get_value(section, "run", true); + + return true; +} + +network_config_spec::network_config_spec(const network_config_spec& r) +: channel(r.channel), hdr_format(r.hdr_format) +{ + port = r.port; + factory_name = r.factory_name; + message_buffer_block_size = r.message_buffer_block_size; +} + +network_config_spec::network_config_spec(int p, rpc_channel c) + : channel(c), hdr_format(NET_HDR_DSN) +{ + port = p; + factory_name = "dsn::tools::asio_network_provider"; + message_buffer_block_size = 65536; +} + +bool network_config_spec::operator < (const network_config_spec& r) const +{ + return port < r.port || (port == r.port && channel < r.channel); +} + +bool service_spec::register_network(const network_config_spec& netcs, bool force) +{ + if (force) + { + network_configs[netcs] = netcs; + return true; + } + else + { + auto it = network_configs.find(netcs); + if (it == network_configs.end()) + { + network_configs[netcs] = netcs; + return true; + } + else + return false; + } +} + +bool service_spec::init(configuration_ptr c) +{ + std::vector poolIds; + + config = c; + tool = config->get_string_value("core", "tool", ""); + toollets = config->get_string_value_list("core", "toollets", ','); + coredump_dir = config->get_string_value("core", "coredump_dir", "./coredump"); + + aio_factory_name = config->get_string_value("core", "aio_factory_name", ""); + env_factory_name = config->get_string_value("core", "env_factory_name", ""); + lock_factory_name = config->get_string_value("core", "lock_factory_name", ""); + rwlock_factory_name = config->get_string_value("core", "rwlock_factory_name", ""); + semaphore_factory_name = config->get_string_value("core", "semaphore_factory_name", ""); + nfs_factory_name = config->get_string_value("core", "nfs_factory_name", ""); + + network_aspects = config->get_string_value_list("core", "network_aspects", ','); + aio_aspects = config->get_string_value_list("core", "aio_aspects", ','); + env_aspects = config->get_string_value_list("core", "env_aspects", ','); + + lock_aspects = config->get_string_value_list("core", "lock_aspects", ','); + rwlock_aspects = config->get_string_value_list("core", "rwlock_aspects", ','); + semaphore_aspects = config->get_string_value_list("core", "semaphore_aspects", ','); + + perf_counter_factory_name = config->get_string_value("core", "perf_counter_factory_name", ""); + logging_factory_name = config->get_string_value("core", "logging_factory_name", ""); + + // init thread pools + threadpool_spec::init(config, threadpool_specs); + + // init task specs + task_spec::init(config); + + // init service apps + std::vector allSectionNames; + config->get_all_sections(allSectionNames); + + int app_id = 0; + for (auto it = allSectionNames.begin(); it != allSectionNames.end(); it++) + { + if (it->substr(0, strlen("apps.")) == std::string("apps.")) + { + service_app_spec app; + app.init((*it).c_str(), config); + + auto ports = app.ports; + auto gap = ports.size() > 0 ? (*ports.rbegin() + 1 - *ports.begin()) : 0; + int count = config->get_value((*it).c_str(), "count", 1); + std::string name = app.name; + for (int i = 1; i <= count; i++) + { + char buf[16]; + sprintf(buf, "%u", i); + app.name = (count > 1 ? (name + buf) : name); + app.id = ++app_id; + + // network configs + for (auto& p : ports) + { + if (1 == i) + { + if (!build_network_spec(p)) + return false; + } + else + { + for (auto& cs : network_configs) + { + if (cs.first.port == p) + { + auto csc = cs.first; + csc.port = p + i * gap; + + if (!register_network(csc, false)) + { + printf("register network configuration confliction for port %d used by %s.%d\n", + csc.port, + app.name.c_str(), + i + ); + return false; + } + } + } + } + } + + // add app + app_specs.push_back(app); + + // for next instance + app.ports.clear(); + for (auto& p : ports) + { + app.ports.push_back(p + i * gap); + } + } + } + } + + return true; +} + +bool service_spec::build_network_spec(int port) +{ + /* + [network.27001] + ;channel = hdr_format,network_provider_name,buffer_block_size + RPC_CHANNEL_TCP = dsn,dsn::tools::asio_network_provider,65536 + RPC_CHANNEL_UDP = dsn,dsn::tools::asio_network_provider,65536 + */ + std::stringstream ss; + ss << "network." << port; + std::string s = ss.str(); + + if (!config->has_section(s.c_str())) + { + // use default settings + return true; + } + + + std::vector cs; + config->get_all_keys(s.c_str(), cs); + + for (auto& c : cs) + { + if (!rpc_channel::is_exist(c.c_str())) + { + printf("invalid rpc channel type '%s', please following the example below to define new channel:" + "\t\tDEFINE_CUSTOMIZED_ID(rpc_channel, RPC_CHANNEL_NEW_TYPE)" + "currently regisered rpc channels types are:\n", c.c_str()); + + for (int i = 0; i <= rpc_channel::max_value(); i++) + { + printf("\t\t%s (%u)\n", rpc_channel::to_string(i), i); + } + return false; + } + + network_config_spec ns(port, rpc_channel(c.c_str())); + + // dsn,dsn::tools::asio_network_provider,65536 + std::list vs; + std::string v = config->get_string_value(s.c_str(), c.c_str(), ""); + utils::split_args(v.c_str(), vs, ','); + + if (vs.size() != 3) + { + printf("invalid network specification '%s', should be '$message-format, $network-factory,$msg-buffer-size'\n", + s.c_str() + ); + return false; + } + + if (!network_header_format::is_exist(vs.begin()->c_str())) + { + printf("invalid network specification, unkown message header format '%s'\n", + vs.begin()->c_str() + ); + return false; + } + + ns.hdr_format = network_header_format(vs.begin()->c_str()); + ns.factory_name = *(++vs.begin()); + ns.message_buffer_block_size = atoi(vs.rbegin()->c_str()); + + if (ns.message_buffer_block_size == 0) + { + printf("invalid message buffer size specified: '%s'\n", vs.rbegin()->c_str()); + return false; + } + + if (!register_network(ns, false)) + { + printf("register network configuration confliction for port %d\n", port); + return false; + } + } + return true; +} + + +} // end namespace dsn diff --git a/src/core/join_point.cpp b/src/core/join_point.cpp new file mode 100644 index 0000000000..bd58a754cc --- /dev/null +++ b/src/core/join_point.cpp @@ -0,0 +1,160 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include + +namespace dsn +{ + +join_point_base::join_point_base(const char* name) +{ + _name = std::string(name); + _hdr.next = _hdr.prev = &_hdr; + _hdr.name = ""; +} + +bool join_point_base::put_front(void* fn, const char* name, bool is_native) +{ + auto e = new_entry(fn, name, is_native); + auto e1 = _hdr.prev; + + e1->next = e; + e->next = &_hdr; + _hdr.prev = e; + e->prev = e1; + + return true; +} + +bool join_point_base::put_back(void* fn, const char* name, bool is_native) +{ + auto e = new_entry(fn, name, is_native); + auto e1 = _hdr.prev; + + e1->next = e; + e->next = &_hdr; + _hdr.prev = e; + e->prev = e1; + + return true; +} + +bool join_point_base::put_before(const char* base, void* fn, const char* name, bool is_native) +{ + auto e0 = get_by_name(name); + if (e0 == nullptr) + { + dassert (false, "cannot find advice with name '%s' in '%s'", name, _name.c_str()); + return false; + } + + auto e = new_entry(fn, name, is_native); + + auto e1 = e0->prev; + e1->next = e; + e->next = e0; + e0->prev = e; + e->prev = e1; + + return true; +} + +bool join_point_base::put_after(const char* base, void* fn, const char* name, bool is_native) +{ + auto e0 = get_by_name(name); + if (e0 == nullptr) + { + dassert (false, "cannot find advice with name '%s' in '%s'", name, _name.c_str()); + return false; + } + + auto e = new_entry(fn, name, is_native); + + auto e1 = e0->next; + e1->prev = e; + e->prev = e0; + e0->next = e; + e->next = e1; + + return true; +} + +bool join_point_base::put_replace(const char* base, void* fn, const char* name) +{ + auto e0 = get_by_name(name); + if (e0 == nullptr) + { + dassert (false, "cannot find advice with name '%s' in '%s'", name, _name.c_str()); + return false; + } + else + { + e0->func = fn; + e0->name = name; + return true; + } +} + +bool join_point_base::remove(const char* name) +{ + auto e0 = get_by_name(name); + if (e0 == nullptr) + { + dassert (false, "cannot find advice with name '%s' in '%s'", name, _name.c_str()); + return false; + } + + e0->next->prev = e0->prev; + e0->prev->next = e0->next; + + return true; +} + +join_point_base::advice_entry* join_point_base::new_entry(void* fn, const char* name, bool is_native) +{ + auto e = new advice_entry(); + e->name = std::string(name); + e->func = fn; + e->is_native = is_native; + e->next = e->prev = e; + return e; +} + +join_point_base::advice_entry* join_point_base::get_by_name(const char* name) +{ + auto p = _hdr.next; + while (p != &_hdr) + { + if (strcmp(name, p->name.c_str()) == 0) + return p; + + p = p->next; + } + + return nullptr; +} + +} // end namespace dsn diff --git a/src/core/logging.cpp b/src/core/logging.cpp new file mode 100644 index 0000000000..40fe2522a3 --- /dev/null +++ b/src/core/logging.cpp @@ -0,0 +1,53 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include "service_engine.h" + +namespace dsn { + + void logv(const char *file, const char *function, const int line, logging_level logLevel, const char* title, const char* fmt, va_list args) + { + logging_provider* logger = service_engine::instance().logging(); + if (logger != nullptr) + { + logger->logv(file, function, line, logLevel, title, fmt, args); + } + } + + void logv(const char *file, const char *function, const int line, logging_level logLevel, const char* title, const char* fmt, ...) + { + va_list ap; + va_start(ap, fmt); + logv(file, function, line, logLevel, title, fmt, ap); + va_end(ap); + } + + void logv(const char *file, const char *function, const int line, logging_level logLevel, const char* title) + { + logv(file, function, line, logLevel, title, ""); + } + +} // end name diff --git a/src/core/message_parser.cpp b/src/core/message_parser.cpp new file mode 100644 index 0000000000..1cd7212af3 --- /dev/null +++ b/src/core/message_parser.cpp @@ -0,0 +1,126 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include + +# define __TITLE__ "message.parser" + +namespace dsn { + + message_parser::message_parser(int buffer_block_size) + : _buffer_block_size(buffer_block_size) + { + create_new_buffer(buffer_block_size); + } + + void message_parser::create_new_buffer(int sz) + { + std::shared_ptr buffer((char*)::malloc(sz)); + _read_buffer.assign(buffer, 0, sz); + _read_buffer_occupied = 0; + } + + void message_parser::mark_read(int read_length) + { + dassert(read_length + _read_buffer_occupied <= _read_buffer.length(), ""); + _read_buffer_occupied += read_length; + } + + // before read + void* message_parser::read_buffer_ptr(int read_next) + { + if (read_next + _read_buffer_occupied > _read_buffer.length()) + { + // remember currently read content + auto rb = _read_buffer.range(0, _read_buffer_occupied); + + // switch to next + if (read_next + _read_buffer_occupied > _buffer_block_size) + create_new_buffer(read_next + _read_buffer_occupied); + else + create_new_buffer(_buffer_block_size); + + // copy + if (rb.length() > 0) + { + memcpy((void*)_read_buffer.data(), (const void*)rb.data(), rb.length()); + _read_buffer_occupied = rb.length(); + } + + dassert (read_next + _read_buffer_occupied <= _read_buffer.length(), ""); + } + + return (void*)(_read_buffer.data() + _read_buffer_occupied); + } + + int message_parser::read_buffer_capacity() const + { + return _read_buffer.length() - _read_buffer_occupied; + } + + //-------------------- dsn message -------------------- + + dsn_message_parser::dsn_message_parser(int buffer_block_size) + : message_parser(buffer_block_size) + { + } + + message_ptr dsn_message_parser::on_read(int read_length, __out_param int& read_next) + { + mark_read(read_length); + + if (_read_buffer_occupied >= message_header::serialized_size()) + { + int msg_sz = message_header::serialized_size() + + message_header::get_body_length((char*)_read_buffer.data()); + + // msg done + if (_read_buffer_occupied >= msg_sz) + { + auto msg_bb = _read_buffer.range(0, msg_sz); + message_ptr msg = new message(msg_bb, true); + + dassert(msg->is_right_header() && msg->is_right_body(), ""); + + _read_buffer = _read_buffer.range(msg_sz); + _read_buffer_occupied -= msg_sz; + read_next = message_header::serialized_size(); + return msg; + } + else + { + read_next = msg_sz - _read_buffer_occupied; + return nullptr; + } + } + + else + { + read_next = message_header::serialized_size() - _read_buffer_occupied; + return nullptr; + } + } +} \ No newline at end of file diff --git a/src/core/network.cpp b/src/core/network.cpp new file mode 100644 index 0000000000..09a2f7826a --- /dev/null +++ b/src/core/network.cpp @@ -0,0 +1,221 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include "rpc_engine.h" + +# define __TITLE__ "rpc_session" + +namespace dsn { + + rpc_client_session::rpc_client_session(network& net, const end_point& remote_addr, std::shared_ptr& matcher) + : _net(net), _remote_addr(remote_addr), _matcher(matcher) + { + } + + void rpc_client_session::call(message_ptr& request, rpc_response_task_ptr& call) + { + if (call != nullptr) + { + _matcher->on_call(request, call, &_net); + } + + send(request); + } + + void rpc_client_session::on_disconnected() + { + rpc_client_session_ptr sp = this; + _net.on_client_session_disconnected(sp); + } + + bool rpc_client_session::on_recv_reply(uint64_t key, message_ptr& reply, int delay_ms) + { + if (reply != nullptr) + { + reply->header().from_address = remote_address(); + reply->header().to_address = _net.address(); + } + + return _matcher->on_recv_reply(key, reply, delay_ms); + } + + //////////////////////////////////////////////////////////////////////////////////////////////// + + rpc_server_session::rpc_server_session(network& net, const end_point& remote_addr) + : _remote_addr(remote_addr), _net(net) + { + } + + void rpc_server_session::on_recv_request(message_ptr& msg, int delay_ms) + { + msg->header().from_address = remote_address(); + msg->header().from_address.port = msg->header().client.port; + msg->header().to_address = _net.address(); + + msg->server_session().reset(this); + return _net.engine()->on_recv_request(msg, delay_ms); + } + + void rpc_server_session::on_disconnected() + { + rpc_server_session_ptr sp = this; + return _net.on_server_session_disconnected(sp); + } + + //////////////////////////////////////////////////////////////////////////////////////////////// + network::network(rpc_engine* srv, network* inner_provider) + : _engine(srv), _parser_type(NET_HDR_DSN) + { + _message_buffer_block_size = 1024 * 64; + } + + void network::reset_parser(network_header_format name, int message_buffer_block_size) + { + _message_buffer_block_size = message_buffer_block_size; + _parser_type = name; + } + + service_node* network::node() const + { + return _engine->node(); + } + + std::shared_ptr network::new_client_matcher() + { + return std::shared_ptr(new rpc_client_matcher()); + } + + std::shared_ptr network::new_message_parser() + { + message_parser * parser = utils::factory_store::create(_parser_type.to_string(), PROVIDER_TYPE_MAIN, _message_buffer_block_size); + dassert(parser, "message parser '%s' not registerd or invalid!", _parser_type.to_string()); + return std::shared_ptr(parser); + } + + void network::call(message_ptr& request, rpc_response_task_ptr& call) + { + rpc_client_session_ptr client = nullptr; + end_point& to = request->header().to_address; + bool new_client = false; + + { + utils::auto_read_lock l(_clients_lock); + auto it = _clients.find(to); + if (it != _clients.end()) + { + client = it->second; + } + } + + if (nullptr == client.get()) + { + utils::auto_write_lock l(_clients_lock); + auto it = _clients.find(to); + if (it != _clients.end()) + { + client = it->second; + } + else + { + client = create_client_session(to); + _clients.insert(client_sessions::value_type(to, client)); + new_client = true; + } + } + + // init connection if necessary + if (new_client) + client->connect(); + + // rpc call + client->call(request, call); + } + + rpc_server_session_ptr network::get_server_session(const end_point& ep) + { + utils::auto_read_lock l(_servers_lock); + auto it = _servers.find(ep); + return it != _servers.end() ? it->second : nullptr; + } + + void network::on_server_session_accepted(rpc_server_session_ptr& s) + { + dinfo("server session %s:%d accepted", s->remote_address().name.c_str(), static_cast(s->remote_address().port)); + + utils::auto_write_lock l(_servers_lock); + _servers.insert(server_sessions::value_type(s->remote_address(), s)); + + } + + void network::on_server_session_disconnected(rpc_server_session_ptr& s) + { + bool r = false; + { + utils::auto_write_lock l(_servers_lock); + auto it = _servers.find(s->remote_address()); + if (it != _servers.end() && it->second.get() == s.get()) + { + _servers.erase(it); + r = true; + } + } + + if (r) + { + dinfo("server session %s:%d disconnected", + s->remote_address().name.c_str(), + static_cast(s->remote_address().port)); + } + } + + rpc_client_session_ptr network::get_client_session(const end_point& ep) + { + utils::auto_read_lock l(_clients_lock); + auto it = _clients.find(ep); + return it != _clients.end() ? it->second : nullptr; + } + + void network::on_client_session_disconnected(rpc_client_session_ptr& s) + { + bool r = false; + { + utils::auto_write_lock l(_clients_lock); + auto it = _clients.find(s->remote_address()); + if (it != _clients.end() && it->second.get() == s.get()) + { + _clients.erase(it); + r = true; + } + } + + if (r) + { + dinfo("client session %s:%d disconnected", s->remote_address().name.c_str(), + static_cast(s->remote_address().port)); + } + } +} diff --git a/src/core/perf_counter.cpp b/src/core/perf_counter.cpp new file mode 100644 index 0000000000..907e1b12ef --- /dev/null +++ b/src/core/perf_counter.cpp @@ -0,0 +1,27 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include + diff --git a/src/core/perf_counters.cpp b/src/core/perf_counters.cpp new file mode 100644 index 0000000000..c4ecd52454 --- /dev/null +++ b/src/core/perf_counters.cpp @@ -0,0 +1,117 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include + +namespace dsn { namespace utils { + +perf_counters::perf_counters(void) +{ +} + +perf_counters::~perf_counters(void) +{ +} + +perf_counter_ptr perf_counters::get_counter(const char *section, const char *name, perf_counter_type flags, bool create_if_not_exist /*= false*/) +{ + char section_name[512] = ""; + //::GetModuleBaseNameA(::GetCurrentProcess(), ::GetModuleHandleA(nullptr), section_name, 256); + //strcat(section_name, "."); + strcat(section_name, section); + + if (create_if_not_exist) + { + auto_write_lock l(_lock); + + auto it = _counters.find(section_name); + if (it == _counters.end()) + { + same_section_counters sc; + it = _counters.insert(all_counters::value_type(section_name, sc)).first; + } + + auto it2 = it->second.find(name); + if (it2 == it->second.end()) + { + perf_counter_ptr counter(_factory(section_name, name, flags)); + it->second.insert(same_section_counters::value_type(name, std::make_pair(counter, flags))); + return counter; + } + else + { + dassert (it2->second.second == flags, "counters with the same name %s.%s with differnt types", section_name, name); + return it2->second.first; + } + } + else + { + auto_read_lock l(_lock); + + auto it = _counters.find(section_name); + if (it == _counters.end()) + return nullptr; + + auto it2 = it->second.find(name); + if (it2 == it->second.end()) + return nullptr; + + return it2->second.first; + } +} + +bool perf_counters::remove_counter(const char* section, const char* name) +{ + char section_name[512] = ""; + //::GetModuleBaseNameA(::GetCurrentProcess(), ::GetModuleHandleA(nullptr), section_name, 256); + //strcat(section_name, "."); + strcat(section_name, section); + + auto_write_lock l(_lock); + + auto it = _counters.find(section_name); + if (it == _counters.end()) + return false; + + auto it2 = it->second.find(name); + if (it2 == it->second.end()) + return false; + + it->second.erase(it2); + if (it->second.size() == 0) + _counters.erase(it); + + return true; +} + +void perf_counters::register_factory(perf_counter_factory factory) +{ + auto_write_lock l(_lock); + _factory = factory; +} + +} } // end namespace + diff --git a/src/core/rpc_engine.cpp b/src/core/rpc_engine.cpp new file mode 100644 index 0000000000..8274deae8a --- /dev/null +++ b/src/core/rpc_engine.cpp @@ -0,0 +1,448 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# ifdef _WIN32 +# include +# else +# include +# include +# endif + +# include "rpc_engine.h" +# include "service_engine.h" +# include +# include +# include +# include + +# define __TITLE__ "rpc.engine" + +namespace dsn { + + DEFINE_TASK_CODE(LPC_RPC_TIMEOUT, TASK_PRIORITY_COMMON, THREAD_POOL_DEFAULT) + + class rpc_timeout_task : public task + { + public: + rpc_timeout_task(std::shared_ptr se, uint64_t id, task_spec* s) : task(LPC_RPC_TIMEOUT) + { + _s = se; + _id = id; + _spec = s; + } + + virtual void exec() + { + _s->on_rpc_timeout(_id, _spec); + } + + private: + std::shared_ptr _s; + uint64_t _id; + task_spec *_spec; + }; + + bool rpc_client_matcher::on_recv_reply(uint64_t key, message_ptr& reply, int delay_ms) + { + dassert(reply != nullptr, "cannot recieve an empty reply message"); + + error_code sys_err = reply->error(); + rpc_response_task_ptr call; + task_ptr timeout_task; + bool ret; + + { + utils::auto_lock l(_requests_lock); + auto it = _requests.find(key); + if (it != _requests.end()) + { + call = it->second.resp_task; + timeout_task = it->second.timeout_task; + _requests.erase(it); + ret = true; + } + else + { + ret = false; + } + } + + if (call != nullptr) + { + if (timeout_task != task::get_current_task()) + { + timeout_task->cancel(true); + } + + call->set_delay(delay_ms); + call->enqueue(sys_err, reply); + } + + return ret; + } + + void rpc_client_matcher::on_rpc_timeout(uint64_t key, task_spec* spec) + { + rpc_response_task_ptr call; + network* net; + + { + utils::auto_lock l(_requests_lock); + auto it = _requests.find(key); + if (it != _requests.end()) + { + call = it->second.resp_task; + net = it->second.net; + _requests.erase(it); + } + else + { + return; + } + } + + message_ptr& msg = call->get_request(); + auto nts = ::dsn::service::env::now_us(); + + // timeout already + if (nts >= msg->header().client.timeout_ts_us) + { + message_ptr null_msg(nullptr); + call->enqueue(ERR_TIMEOUT, null_msg); + } + else + { + net->call(msg, call); + } + } + + + void rpc_client_matcher::on_call(message_ptr& request, rpc_response_task_ptr& call, network* net) + { + message* msg = request.get(); + task_ptr timeout_task; + task_spec* spec = task_spec::get(msg->header().local_rpc_code); + message_header& hdr = msg->header(); + + timeout_task = (new rpc_timeout_task(shared_from_this(), hdr.id, spec)); + + { + utils::auto_lock l(_requests_lock); + auto pr = _requests.insert(rpc_requests::value_type(hdr.id, match_entry())); + dassert (pr.second, "the message is already on the fly!!!"); + pr.first->second.resp_task = call; + pr.first->second.timeout_task = timeout_task; + pr.first->second.net = net; + //{call, timeout_task, net } + } + + auto nts = ::dsn::service::env::now_us(); + auto tts = msg->header().client.timeout_ts_us; + + int timeout_ms = static_cast(tts > nts ? (tts - nts) : 0)/1000; + if (timeout_ms >= spec->rpc_retry_interval_milliseconds * 2) + timeout_ms = spec->rpc_retry_interval_milliseconds; + + dassert (timeout_ms > 0, ""); + + timeout_task->set_delay(timeout_ms); + timeout_task->enqueue(); + } + + //------------------------ + /*static*/ bool rpc_engine::_message_crc_required; + + rpc_engine::rpc_engine(configuration_ptr config, service_node* node) + : _config(config), _node(node) + { + dassert (_node != nullptr, ""); + dassert (_config != nullptr, ""); + + _is_running = false; + _local_primary_address = end_point::INVALID; + _message_crc_required = config->get_value("network", "message_crc_required", false); + } + + // + // management routines + // + network* rpc_engine::create_network(const network_config_spec& netcs, bool client_only) + { + const service_spec& spec = service_engine::instance().spec(); + auto net = utils::factory_store::create( + netcs.factory_name.c_str(), PROVIDER_TYPE_MAIN, this, nullptr); + net->reset_parser(netcs.hdr_format, netcs.message_buffer_block_size); + + for (auto it = spec.network_aspects.begin(); + it != spec.network_aspects.end(); + it++) + { + net = utils::factory_store::create(it->c_str(), PROVIDER_TYPE_ASPECT, this, net); + } + + // start the net + error_code ret = net->start(netcs.channel, netcs.port, client_only); + if (ret == ERR_SUCCESS) + { + return net; + } + else + { + // mem leak, don't care as it halts the program + return nullptr; + } + } + + bool rpc_engine::start_server_port(int port) + { + // exsiting servers + if (_server_nets.find(port) != _server_nets.end()) + return false; + + std::vector* pnets; + std::vector nets; + auto pr = _server_nets.insert(std::map>::value_type(port, nets)); + pnets = &pr.first->second; + + pnets->resize(rpc_channel::max_value() + 1); + const service_spec& spec = service_engine::instance().spec(); + for (int i = 0; i <= rpc_channel::max_value(); i++) + { + network_config_spec cs(port, rpc_channel(rpc_channel::to_string(i))); + network* net = nullptr; + + auto it = spec.network_configs.find(cs); + if (it != spec.network_configs.end()) + { + net = create_network(it->second, false); + } + + else + { + auto it = spec.network_default_configs.find(cs.channel); + if (it != spec.network_default_configs.end()) + { + cs.factory_name = it->second.factory_name; + cs.message_buffer_block_size = it->second.message_buffer_block_size; + + net = create_network(cs, false); + } + } + + (*pnets)[i] = net; + + // report + if (net) + { + dinfo("network started at port %u, channel = %s, fmt = %s ...", + (uint32_t)port, + rpc_channel::to_string(i), + cs.hdr_format.to_string() + ); + } + } + return true; + } + + error_code rpc_engine::start(int app_id, const std::vector& ports) + { + if (_is_running) + { + return ERR_SERVICE_ALREADY_RUNNING; + } + + // local cache for shared networks with same provider and message format and port + std::map named_nets; // factory##fmt##port -> net + + // start client networks + bool r; + _client_nets.resize(network_header_format::max_value() + 1); + + const service_spec& spec = service_engine::instance().spec(); + for (int i = 0; i <= network_header_format::max_value(); i++) + { + std::vector& pnet = _client_nets[i]; + pnet.resize(rpc_channel::max_value() + 1); + for (int j = 0; j <= rpc_channel::max_value(); j++) + { + rpc_channel c = rpc_channel(rpc_channel::to_string(j)); + auto it = spec.network_default_configs.find(c); + if (it != spec.network_default_configs.end()) + { + network_config_spec cs(app_id, c); + + cs.factory_name = it->second.factory_name; + cs.message_buffer_block_size = it->second.message_buffer_block_size; + cs.hdr_format = network_header_format(network_header_format::to_string(i)); + + auto net = create_network(cs, true); + if (!net) return ERR_NETWORK_INIT_FALED; + pnet[j] = net; + } + else + { + dwarn("network client for channel %s not registered, assuming not used further", c.to_string()); + } + } + } + + // start server networks + for (auto& p : ports) + { + r = start_server_port(p); + if (!r) return ERR_NETWORK_INIT_FALED; + } + + _local_primary_address = _client_nets[0][0]->address(); + _local_primary_address.port = ports.size() > 0 ? *ports.begin() : app_id; + + _is_running = true; + return ERR_SUCCESS; + } + + bool rpc_engine::register_rpc_handler(rpc_handler_ptr& handler) + { + utils::auto_write_lock l(_handlers_lock); + auto it = _handlers.find(handler->code.to_string()); + auto it2 = _handlers.find(handler->name); + if (it == _handlers.end() && it2 == _handlers.end()) + { + _handlers[handler->code.to_string()] = handler; + _handlers[handler->name] = handler; + return true; + } + else + { + dassert(false, "rpc registration confliction for '%s'", handler->code.to_string()); + return false; + } + } + + bool rpc_engine::unregister_rpc_handler(task_code rpc_code) + { + utils::auto_write_lock l(_handlers_lock); + auto it = _handlers.find(rpc_code.to_string()); + if (it == _handlers.end()) + return false; + + std::string name = it->second->name; + _handlers.erase(it); + _handlers.erase(name); + return true; + } + + void rpc_engine::on_recv_request(message_ptr& msg, int delay_ms) + { + rpc_handler_ptr handler; + { + utils::auto_read_lock l(_handlers_lock); + auto it = _handlers.find(msg->header().rpc_name); + if (it != _handlers.end()) + { + handler = it->second; + } + } + + if (handler != nullptr) + { + msg->header().local_rpc_code = (uint16_t)handler->code; + auto tsk = handler->handler->new_request_task(msg, node()); + tsk->set_delay(delay_ms); + tsk->enqueue(_node); + } + else + { + // TODO: warning about this msg + dwarn( + "recv unknown message with type %s from %s:%d", + msg->header().rpc_name, + msg->header().from_address.name.c_str(), + static_cast(msg->header().from_address.port) + ); + } + + //counters.RpcServerQps->increment(); + } + + void rpc_engine::call(message_ptr& request, rpc_response_task_ptr& call) + { + message* msg = request.get(); + + auto sp = task_spec::get(msg->header().local_rpc_code); + auto nts_us = ::dsn::service::env::now_us(); + auto& named_nets = _client_nets[sp->rpc_call_header_format]; + network* net = named_nets[sp->rpc_call_channel]; + + dassert(nullptr != net, "network not present for rpc channel '%s' with format '%s' used by rpc %s", + sp->rpc_call_channel.to_string(), + sp->rpc_call_header_format.to_string(), + msg->header().rpc_name + ); + + msg->header().client.port = primary_address().port; + msg->header().from_address = primary_address(); + msg->header().new_rpc_id(); + + // it happens when retry the same request at the app level and timeout is not specified + if (msg->header().client.timeout_ts_us <= nts_us) + { + msg->header().client.timeout_ts_us = nts_us + + static_cast(sp->rpc_timeout_milliseconds) * 1000ULL; + } + + msg->seal(_message_crc_required); + + if (!sp->on_rpc_call.execute(task::get_current_task(), msg, call.get(), true)) + { + if (call != nullptr) + { + int delay_ms = static_cast((msg->header().client.timeout_ts_us - nts_us) / 1000); + + message_ptr nil; + call->set_delay(delay_ms); + call->enqueue(ERR_TIMEOUT, nil); + } + return; + } + + net->call(request, call); + } + + void rpc_engine::reply(message_ptr& response) + { + auto s = response->server_session().get(); + if (s == nullptr) + return; + + message* msg = response.get(); + msg->seal(_message_crc_required); + + auto sp = task_spec::get(msg->header().local_rpc_code); + if (!sp->on_rpc_reply.execute(task::get_current_task(), msg, true)) + return; + + s->send(response); + } +} diff --git a/src/core/rpc_engine.h b/src/core/rpc_engine.h new file mode 100644 index 0000000000..497789de05 --- /dev/null +++ b/src/core/rpc_engine.h @@ -0,0 +1,110 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include + +namespace dsn { + +class rpc_client_matcher : public std::enable_shared_from_this +{ +public: + void on_call(message_ptr& request, rpc_response_task_ptr& call, network* net); + bool on_recv_reply(uint64_t key, message_ptr& reply, int delay_ms); + +private: + friend class rpc_timeout_task; + void on_rpc_timeout(uint64_t key, task_spec* spec); + +private: + struct match_entry + { + rpc_response_task_ptr resp_task; + task_ptr timeout_task; + network* net; + }; + typedef std::map rpc_requests; + rpc_requests _requests; + std::recursive_mutex _requests_lock; +}; + +class service_node; +class rpc_engine +{ +public: + rpc_engine(configuration_ptr config, service_node* node); + + // + // management routines + // + error_code start(int app_id, const std::vector& ports); + bool start_server_port(int port); + + // + // rpc registrations + // + bool register_rpc_handler(rpc_handler_ptr& handler); + bool unregister_rpc_handler(task_code rpc_code) ; + + // + // rpc routines + // + void call(message_ptr& request, rpc_response_task_ptr& call); + static void reply(message_ptr& response); + + // + // information inquery + // + service_node* node() const { return _node; } + const end_point& primary_address() const { return _local_primary_address; } + +private: + friend class rpc_server_session; + void on_recv_request(message_ptr& msg, int delay_ms); + network* create_network(const network_config_spec& netcs, bool client_only); + +private: + configuration_ptr _config; + service_node *_node; + std::vector> _client_nets; // > + std::map> _server_nets; // > + std::shared_ptr _matcher; + end_point _local_primary_address; + + typedef std::map rpc_handlers; + rpc_handlers _handlers; + utils::rw_lock _handlers_lock; + + bool _is_running; + + static bool _message_crc_required; +}; + +} // end namespace + diff --git a/src/core/rpc_message.cpp b/src/core/rpc_message.cpp new file mode 100644 index 0000000000..19fd74fbe7 --- /dev/null +++ b/src/core/rpc_message.cpp @@ -0,0 +1,273 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include +# include +# include "task_engine.h" +# include +# include "crc.h" + +using namespace dsn::utils; + +#define __TITLE__ "message" + +namespace dsn { + +void message_header::marshall(binary_writer& writer) +{ + writer.write((const char*)this, serialized_size()); +} + +void message_header::unmarshall(binary_reader& reader) +{ + reader.read((char*)this, serialized_size()); +} + +void message_header::new_rpc_id() +{ + rpc_id = get_random64(); +} + +/*static*/ bool message_header::is_right_header(char* hdr) +{ + int32_t crc32 = *(int32_t*)hdr; + if (crc32) + { + //dassert (*(int32_t*)data == hdr_crc32, "HeaderCrc must be put at the beginning of the buffer"); + *(int32_t*)hdr = 0; + bool r = ((uint32_t)crc32 == crc32::compute(hdr, message_header::serialized_size(), 0)); + *(int32_t*)hdr = crc32; + return r; + } + + // crc is not enabled + else + { + return true; + } +} + +std::atomic message::_id(0); + +message::message() +{ + _reader = nullptr; + _writer = new binary_writer(); + + memset(&_msg_header, 0, FIELD_OFFSET(message_header, from_address)); + _msg_header.local_rpc_code = 0; + seal(false, true); +} + +message::message(blob bb, bool parse_hdr) +{ + _reader = new binary_reader(bb); + _writer = nullptr; + + if (parse_hdr) + { + read_header(); + _msg_header.local_rpc_code = 0; + } + else + { + memset(&_msg_header, 0, message_header::serialized_size()); + _msg_header.local_rpc_code = 0; + } +} + +message::~message() +{ + if (_reader != nullptr) + { + delete _reader; + _reader = nullptr; + } + + if (_writer != nullptr) + { + delete _writer; + _writer = nullptr; + } +} + +message_ptr message::create_request(task_code rpc_code, int timeout_milliseconds, int hash) +{ + message_ptr msg(new message()); + msg->header().local_rpc_code = (uint16_t)rpc_code; + msg->header().client.hash = hash; + if (timeout_milliseconds == 0) + { + msg->header().client.timeout_ts_us = ::dsn::service::env::now_us() + + static_cast(task_spec::get(rpc_code)->rpc_timeout_milliseconds) * 1000ULL; + } + else + { + msg->header().client.timeout_ts_us = ::dsn::service::env::now_us() + + static_cast(timeout_milliseconds) * 1000ULL; + } + + const char* rpcName = rpc_code.to_string(); + strcpy(msg->header().rpc_name, rpcName); + + msg->header().id = message::new_id(); + return msg; +} + +message_ptr message::create_response() +{ + message_ptr msg(new message()); + + msg->header().id = _msg_header.id; + msg->header().rpc_id = _msg_header.rpc_id; + + msg->header().server.error = ERR_SUCCESS.get(); + msg->header().local_rpc_code = task_spec::get(_msg_header.local_rpc_code)->rpc_paired_code; + + strcpy(msg->header().rpc_name, _msg_header.rpc_name); + strcat(msg->header().rpc_name, "_ACK"); + + msg->header().from_address = _msg_header.to_address; + msg->header().to_address = _msg_header.from_address; + + msg->_server_session = _server_session; + + // join point + task_spec::get(_msg_header.local_rpc_code)->on_create_response.execute(this, msg.get()); + + return msg; +} + +void message::seal(bool fillCrc, bool is_placeholder /*= false*/) +{ + dassert (!is_read(), "seal can only be applied to write mode messages"); + if (is_placeholder) + { + std::string dummy; + dummy.resize(_msg_header.serialized_size(), '\0'); + _writer->write((const char*)&dummy[0], _msg_header.serialized_size()); + } + else + { + header().body_length = total_size() - message_header::serialized_size(); + + if (fillCrc) + { + // compute data crc if necessary + if (header().body_crc32 == 0) + { + std::vector buffers; + _writer->get_buffers(buffers); + + buffers[0] = buffers[0].range(0, buffers[0].length() - message_header::serialized_size()); + + uint32_t crc32 = 0; + uint32_t len = 0; + for (auto it = buffers.begin(); it != buffers.end(); it++) + { + uint32_t lcrc = crc32::compute(it->data(), it->length(), crc32); + + /*uintxx_t uInitialCrcAB, + uintxx_t uInitialCrcA, + uintxx_t uFinalCrcA, + uint64_t uSizeA, + uintxx_t uInitialCrcB, + uintxx_t uFinalCrcB, + uint64_t uSizeB*/ + crc32 = crc32::concatenate( + 0, + 0, crc32, len, + crc32, lcrc, it->length() + ); + + len += it->length(); + } + + dassert (len == (uint32_t)header().body_length, "data length is wrong"); + header().body_crc32 = crc32; + } + + blob bb = _writer->get_first_buffer(); + dassert (bb.length() >= _msg_header.serialized_size(), "the reserved blob size for message must be greater than the header size to ensure header is contiguous"); + header().hdr_crc32 = 0; + binary_writer writer(bb); + _msg_header.marshall(writer); + + header().hdr_crc32 = crc32::compute(bb.data(), message_header::serialized_size(), 0); + *(uint32_t*)bb.data() = header().hdr_crc32; + } + + // crc is not enabled + else + { + blob bb = _writer->get_first_buffer(); + dassert (bb.length() >= _msg_header.serialized_size(), "the reserved blob size for message must be greater than the header size to ensure header is contiguous"); + binary_writer writer(bb); + _msg_header.marshall(writer); + } + } +} + +bool message::is_right_header() const +{ + dassert (is_read(), "message must be of read mode"); + if (_msg_header.hdr_crc32) + { + blob bb = _reader->get_buffer(); + return _msg_header.is_right_header((char*)bb.data()); + } + + // crc is not enabled + else + { + return true; + } +} + +bool message::is_right_body() const +{ + dassert (is_read(), "message must be of read mode"); + if (_msg_header.body_crc32) + { + blob bb = _reader->get_buffer(); + return (uint32_t)_msg_header.body_crc32 == crc32::compute((char*)bb.data() + message_header::serialized_size(), _msg_header.body_length, 0); + } + + // crc is not enabled + else + { + return true; + } +} + +void message::read_header() +{ + dassert (is_read(), "message must be of read mode"); + _msg_header.unmarshall(*_reader); +} + +} // end namespace dsn diff --git a/src/core/service_api.cpp b/src/core/service_api.cpp new file mode 100644 index 0000000000..f433336574 --- /dev/null +++ b/src/core/service_api.cpp @@ -0,0 +1,395 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include "service_engine.h" +# include "task_engine.h" +# include "rpc_engine.h" +# include "disk_engine.h" +# include +# include +# include +# include +# include +# include "command_manager.h" +# include + +# define __TITLE__ "service.api" + +using namespace dsn::tools; + +namespace dsn { namespace service { + + class service_apps : public utils::singleton + { + public: + void add(service_app* app) + { + bool r = _apps.find(app->name()) == _apps.end(); + dassert(r, "apps cannot have the same name for %s", app->name().c_str()); + + _apps[app->name()] = app; + + + if (app->id() > static_cast(_apps_by_id.size())) + { + _apps_by_id.resize(app->id()); + } + dassert(_apps_by_id[app->id() - 1] == nullptr, "apps cannot have the same id %d for %s", app->id(), app->name().c_str()); + _apps_by_id[app->id() - 1] = app; + } + + service_app* get(const char* name) const + { + auto it = _apps.find(name); + if (it != _apps.end()) + return it->second; + else + return nullptr; + } + + service_app* operator [] (int id) const + { + dassert(id >= 1 && id <= static_cast(_apps_by_id.size()), "invalid app id %d", id); + return _apps_by_id[id - 1]; + } + + const std::map& get_all_apps() const { return _apps; } + + private: + std::map _apps; + std::vector _apps_by_id; + }; + + static struct _all_info_ + { + bool engine_ready; + tool_app* tool; + const std::map* apps; + configuration_ptr config; + service_engine* engine; + std::vector task_specs; + } dsn_all; + + class system_runner + { + public: + static bool run(const char* config_file, bool sleep_after_init) + { + dsn_all.engine_ready = false; + dsn_all.tool = nullptr; + dsn_all.apps = &service_apps::instance().get_all_apps(); + dsn_all.engine = &service_engine::instance(); + dsn_all.config.reset(new configuration(config_file)); + for (int i = 0; i <= task_code::max_value(); i++) + { + dsn_all.task_specs.push_back(task_spec::get(i)); + } + + service_spec spec; + if (!spec.init(dsn_all.config)) + { + printf("error in config file %s, exit ...\n", config_file); + return false; + } + + // pause when necessary + if (dsn_all.config->get_value("core", "pause_on_start", false)) + { +#if defined(_WIN32) + printf("\nPause for debugging (pid = %d)...\n", static_cast(::GetCurrentProcessId())); +#else + printf("\nPause for debugging (pid = %d)...\n", static_cast(getpid())); +#endif + getchar(); + } + + // setup coredump + if (!boost::filesystem::exists(spec.coredump_dir)) + { + boost::filesystem::create_directory(spec.coredump_dir); + } + std::string cdir = boost::filesystem::canonical(boost::filesystem::path(spec.coredump_dir)).string(); + utils::coredump::init(cdir.c_str()); + + // init tools + dsn_all.tool = utils::factory_store::create(spec.tool.c_str(), 0, spec.tool.c_str()); + dsn_all.tool->install(spec); + + // prepare minimum necessary + service_engine::instance().init_before_toollets(spec); + + // init toollets + for (auto it = spec.toollets.begin(); it != spec.toollets.end(); it++) + { + auto tlet = dsn::tools::internal_use_only::get_toollet(it->c_str(), 0); + dassert(tlet, "toolet not found"); + tlet->install(spec); + } + + // init provider specific system inits + dsn::tools::syste_init.execute(config_file); + + // TODO: register syste_exit execution + + // init runtime + service_engine::instance().init_after_toollets(); + + dsn_all.engine_ready = true; + + // init apps + for (auto it = spec.app_specs.begin(); it != spec.app_specs.end(); it++) + { + if (it->run) + { + service_app* app = utils::factory_store::create(it->type.c_str(), 0, &(*it)); + dassert(app != nullptr, "Cannot create service app with type name '%s'", it->type.c_str()); + service_apps::instance().add(app); + } + } + + auto apps = service_apps::instance().get_all_apps(); + for (auto it = apps.begin(); it != apps.end(); it++) + { + service_app* app = it->second; + auto node = service_engine::instance().start_node(app->spec().id, app->name(), app->spec().ports); + app->set_service_node(node); + } + + // start cli if necessary + if (dsn_all.config->get_value("core", "cli_local", true)) + { + ::dsn::command_manager::instance().start_local_cli(); + } + + if (dsn_all.config->get_value("core", "cli_remote", true)) + { + ::dsn::command_manager::instance().start_remote_cli(); + } + + // start the tool + dsn_all.tool->run(); + + // + if (sleep_after_init) + { + #ifdef max + #undef max + #endif + std::this_thread::sleep_for(std::chrono::hours::max()); + } + + return true; + } + }; + +namespace system +{ + namespace internal_use_only + { + bool register_service(const char* name, service_app_factory factory) + { + return utils::factory_store::register_factory(name, factory, 0); + } + } + + bool run(const char* config_file, bool sleep_after_init) + { + return ::dsn::service::system_runner::run(config_file, sleep_after_init); + } + + bool is_ready() + { + return dsn_all.engine_ready; + } + + + configuration_ptr config() + { + return dsn_all.config; + } + + service_app* get_current_app() + { + return service_apps::instance()[task::get_current_task()->node()->id()]; + } + + const std::map& get_all_apps() + { + return service_apps::instance().get_all_apps(); + } +} + +namespace rpc +{ + const end_point& primary_address() + { + auto tsk = task::get_current_task(); + dassert(tsk != nullptr, "this function can only be invoked inside tasks"); + + return tsk->node()->rpc()->primary_address(); + } + + bool register_rpc_handler(task_code code, const char* name, rpc_server_handler* handler) + { + auto tsk = task::get_current_task(); + dassert(tsk != nullptr, "this function can only be invoked inside tasks"); + + rpc_handler_ptr h(new rpc_handler_info(code)); + h->name = std::string(name); + h->handler = handler; + + return tsk->node()->rpc()->register_rpc_handler(h); + } + + bool unregister_rpc_handler(task_code code) + { + auto tsk = task::get_current_task(); + dassert(tsk != nullptr, "this function can only be invoked inside tasks"); + + return tsk->node()->rpc()->unregister_rpc_handler(code); + } + + rpc_response_task_ptr call(const end_point& server, message_ptr& request, rpc_response_task_ptr callback) + { + auto tsk = task::get_current_task(); + dassert(tsk != nullptr, "this function can only be invoked inside tasks"); + + if (nullptr == callback) + { + callback.reset(new rpc_response_task_empty(request)); + } + + rpc_engine* rpc = tsk->node()->rpc(); + request->header().to_address = server; + rpc->call(request, callback); + return callback; + } + + void call_one_way(const end_point& server, message_ptr& request) + { + auto tsk = task::get_current_task(); + dassert(tsk != nullptr, "this function can only be invoked inside tasks"); + + rpc_response_task_ptr nil; + rpc_engine* rpc = tsk->node()->rpc(); + request->header().to_address = server; + rpc->call(request, nil); + } + + void reply(message_ptr& response) + { + rpc_engine::reply(response); + } +} + +namespace file +{ + handle_t open(const char* file_name, int flag, int pmode) + { + auto tsk = task::get_current_task(); + dassert(tsk != nullptr, "this function can only be invoked inside tasks"); + + return tsk->node()->disk()->open(file_name, flag, pmode); + } + + void read(handle_t hFile, char* buffer, int count, uint64_t offset, aio_task_ptr& callback) + { + auto tsk = task::get_current_task(); + dassert(tsk != nullptr, "this function can only be invoked inside tasks"); + + callback->aio()->buffer = buffer; + callback->aio()->buffer_size = count; + callback->aio()->engine = nullptr; + callback->aio()->file = hFile; + callback->aio()->file_offset = offset; + callback->aio()->type = AIO_Read; + + tsk->node()->disk()->read(callback); + } + + void write(handle_t hFile, const char* buffer, int count, uint64_t offset, aio_task_ptr& callback) + { + auto tsk = task::get_current_task(); + dassert(tsk != nullptr, "this function can only be invoked inside tasks"); + + callback->aio()->buffer = (char*)buffer; + callback->aio()->buffer_size = count; + callback->aio()->engine = nullptr; + callback->aio()->file = hFile; + callback->aio()->file_offset = offset; + callback->aio()->type = AIO_Write; + + tsk->node()->disk()->write(callback); + } + + error_code close(handle_t hFile) + { + auto tsk = task::get_current_task(); + dassert(tsk != nullptr, "this function can only be invoked inside tasks"); + + return tsk->node()->disk()->close(hFile); + } + + void copy_remote_files( + const end_point& remote, + std::string& source_dir, + std::vector& files, // empty for all + std::string& dest_dir, + bool overwrite, + aio_task_ptr& callback + ) + { + std::shared_ptr rci(new remote_copy_request()); + rci->source = remote; + rci->source_dir = source_dir; + rci->files = files; + rci->dest_dir = dest_dir; + rci->overwrite = overwrite; + + auto tsk = task::get_current_task(); + dassert(tsk != nullptr, "this function can only be invoked inside tasks"); + + return tsk->node()->nfs()->call(rci, callback); + } +} + +namespace env +{ + // since Epoch (1970-01-01 00:00:00 +0000 (UTC)) + uint64_t now_ns() + { + return service_engine::instance().env()->now_ns(); + } + + // generate random number [min, max] + uint64_t random64(uint64_t min, uint64_t max) + { + return service_engine::instance().env()->random64(min, max); + } +} + +}} // end namespace dsn::service diff --git a/src/core/service_app.cpp b/src/core/service_app.cpp new file mode 100644 index 0000000000..71dc70a62a --- /dev/null +++ b/src/core/service_app.cpp @@ -0,0 +1,60 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include + +namespace dsn { namespace service { + +service_app::service_app(service_app_spec* s) +{ + _spec = *s; + + std::vector args; + utils::split_args(_spec.arguments.c_str(), args); + + int argc = static_cast(args.size()) + 1; + _args_ptr.resize(argc); + _args.resize(argc); + for (int i = 0; i < argc; i++) + { + if (0 == i) + { + _args[0] = _spec.type; + } + else + { + _args[i] = args[i-1]; + } + + _args_ptr[i] = ((char*)_args[i].c_str()); + } +} + +service_app::~service_app(void) +{ +} + +}} // end namespace dsn::service_api diff --git a/src/core/service_engine.cpp b/src/core/service_engine.cpp new file mode 100644 index 0000000000..24e99ffa4e --- /dev/null +++ b/src/core/service_engine.cpp @@ -0,0 +1,190 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "service_engine.h" +# include "task_engine.h" +# include "disk_engine.h" +# include "rpc_engine.h" +# include +# include +# include +# include +# include +# include + +#define __TITLE__ "service_engine" + +using namespace dsn::utils; + +namespace dsn { + +service_node::service_node(int app_id, const std::string& app_name) +{ + _computation = nullptr; + _rpc = nullptr; + _disk = nullptr; + _nfs = nullptr; + _app_id = app_id; + _app_name = app_name; +} + +error_code service_node::start(const std::vector& ports) +{ + auto& spec = service_engine::instance().spec(); + + // init task engine + _computation = new task_engine(this); + _computation->start(spec.threadpool_specs); + dassert (_computation->is_started(), "task engine must be started at this point"); + + // init disk engine + _disk = new disk_engine(this); + aio_provider* aio = factory_store::create(spec.aio_factory_name.c_str(), PROVIDER_TYPE_MAIN, _disk, nullptr); + for (auto it = spec.aio_aspects.begin(); + it != spec.aio_aspects.end(); + it++) + { + aio = factory_store::create(it->c_str(), PROVIDER_TYPE_ASPECT, _disk, aio); + } + _disk->start(aio); + + // init rpc engine + _rpc = new rpc_engine(spec.config, this); + error_code err = _rpc->start(_app_id, ports); + if (err != ERR_SUCCESS) return err; + + // init nfs + if (spec.nfs_factory_name == "") + { + dwarn ("nfs not started coz no nfs_factory_name is specified, continue with no nfs"); + } + else + { + _nfs = factory_store::create(spec.nfs_factory_name.c_str(), PROVIDER_TYPE_MAIN, this); + } + + return err; +} + +////////////////////////////////////////////////////////////////////////////////////////// + +service_engine::service_engine(void) +{ + _env = nullptr; + _logging = nullptr; +} + +void service_engine::init_before_toollets(const service_spec& spec) +{ + _spec = spec; + + // init common providers (first half) + _logging = factory_store::create(spec.logging_factory_name.c_str(), PROVIDER_TYPE_MAIN, nullptr); + perf_counters::instance().register_factory(factory_store::get_factory(spec.perf_counter_factory_name.c_str(), PROVIDER_TYPE_MAIN)); +} + +void service_engine::init_after_toollets() +{ + // init common providers (second half) + _env = factory_store::create(_spec.env_factory_name.c_str(), PROVIDER_TYPE_MAIN, nullptr); + for (auto it = _spec.env_aspects.begin(); + it != _spec.env_aspects.end(); + it++) + { + _env = factory_store::create(it->c_str(), PROVIDER_TYPE_ASPECT, _env); + } +} + +void service_engine::register_system_rpc_handler(task_code code, const char* name, rpc_server_handler* handler, int port /*= -1*/) // -1 for all node +{ + rpc_handler_ptr h(new rpc_handler_info(code)); + h->name = std::string(name); + h->handler = handler; + + if (port == -1) + { + for (auto& n : _nodes_by_app_id) + { + n.second->rpc()->register_rpc_handler(h); + } + } + else + { + auto it = _nodes_by_app_port.find(port); + if (it != _nodes_by_app_port.end()) + { + it->second->rpc()->register_rpc_handler(h); + } + else + { + dwarn("cannot find service node with port %d", port); + } + } +} + +service_node* service_engine::start_node(int app_id, const std::string& app_name, const std::vector& ports) +{ + auto it = _nodes_by_app_id.find(app_id); + if (it != _nodes_by_app_id.end()) + { + return it->second; + } + else + { + for (auto p : ports) + { + // union to existing node if any port is shared + if (_nodes_by_app_port.find(p) != _nodes_by_app_port.end()) + { + service_node* n = _nodes_by_app_port[p]; + + dassert(false, "network port %d usage confliction for %s vs %s, please reconfig", + p, + n->name(), + app_name.c_str() + ); + } + } + + auto node = new service_node(app_id, app_name); + error_code err = node->start(ports); + dassert (err == 0, "service node start failed, err = %s", err.to_string()); + + _nodes_by_app_id[app_id] = node; + for (auto p1 : ports) + { + _nodes_by_app_port[p1] = node; + } + + return node; + } +} + +void service_engine::configuration_changed(configuration_ptr configuration) +{ + task_spec::init(configuration); +} + +} // end namespace diff --git a/src/core/service_engine.h b/src/core/service_engine.h new file mode 100644 index 0000000000..53397142df --- /dev/null +++ b/src/core/service_engine.h @@ -0,0 +1,99 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include +# include +# include + +namespace dsn { + +class task_engine; +class rpc_engine; +class disk_engine; +class env_provider; +class logging_provider; +class nfs_node; + +class service_node +{ +public: + service_node(int app_id, const std::string& app_name); + + task_engine* computation() const { return _computation; } + rpc_engine* rpc() const { return _rpc; } + disk_engine* disk() const { return _disk; } + nfs_node* nfs() const { return _nfs; } + + error_code start(const std::vector& ports); + + int id() const { return _app_id; } + const char* name() const { return _app_name.c_str(); } + +private: + int _app_id; + std::string _app_name; + task_engine* _computation; + rpc_engine* _rpc; + disk_engine* _disk; + nfs_node* _nfs; +}; + +class rpc_server_handler; +class service_engine : public utils::singleton +{ +public: + service_engine(); + + //ServiceMode Mode() const { return _spec.Mode; } + const service_spec& spec() const { return _spec; } + env_provider* env() const { return _env; } + logging_provider* logging() const { return _logging; } + + void init_before_toollets(const service_spec& spec); + void init_after_toollets(); + void configuration_changed(configuration_ptr configuration); + + service_node* start_node(int app_id, const std::string& app_name, const std::vector& ports); + void register_system_rpc_handler(task_code code, const char* name, rpc_server_handler* handler, int port = -1); // -1 for all nodes + +private: + service_spec _spec; + env_provider* _env; + logging_provider* _logging; + + // + typedef std::map node_engines_by_app_id; + typedef std::map node_engines_by_port; // multiple ports may share the same node + node_engines_by_app_id _nodes_by_app_id; + node_engines_by_port _nodes_by_app_port; +}; + +// ------------ inline impl --------------------- + +} // end namespace diff --git a/src/core/task.cpp b/src/core/task.cpp new file mode 100644 index 0000000000..fb071d9e0d --- /dev/null +++ b/src/core/task.cpp @@ -0,0 +1,447 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include "service_engine.h" +# include +# include "task_engine.h" +# include +# include +# include "service_engine.h" +# include "disk_engine.h" +# include "rpc_engine.h" +# include + +#define __TITLE__ "task" + +namespace dsn { + +static __thread +struct +{ + uint32_t magic; + task *current_task; + task_worker *worker; +} tls_task_info; + +/*static*/ task* task::get_current_task() +{ + if (tls_task_info.magic == 0xdeadbeef) + return tls_task_info.current_task; + else + return nullptr; +} + +/*static*/ uint64_t task::get_current_task_id() +{ + if (tls_task_info.magic == 0xdeadbeef) + return tls_task_info.current_task ? tls_task_info.current_task->id() : 0; + else + return 0; +} + + +/*static*/ task_worker* task::get_current_worker() +{ + if (tls_task_info.magic == 0xdeadbeef) + return tls_task_info.worker; + else + return nullptr; +} + +/*static*/ void task::set_current_worker(task_worker* worker) +{ + if (tls_task_info.magic == 0xdeadbeef) + { + tls_task_info.worker = worker; + } + else + { + tls_task_info.magic = 0xdeadbeef; + tls_task_info.worker = worker; + tls_task_info.current_task = nullptr; + } +} + +task::task(task_code code, int hash, service_node* node) + : _state(TASK_STATE_READY) +{ + _spec = task_spec::get(code); + _task_id = (uint64_t)(this); + _wait_event.store(nullptr); + _hash = hash; + _delay_milliseconds = 0; + _wait_for_cancel = false; + _is_null = false; + + if (node != nullptr) + { + _node = node; + } + else + { + auto p = get_current_task(); + dassert(p != nullptr, "tasks without explicit service node " + "can only be created inside other tasks"); + _node = p->node(); + } +} + +task::~task() +{ + if (nullptr != _wait_event.load()) + { + delete (utils::notify_event*)_wait_event.load(); + _wait_event.store(nullptr); + } +} + +void task::exec_internal() +{ + task_state READY_STATE = TASK_STATE_READY; + task_state RUNNING_STATE = TASK_STATE_RUNNING; + + if (_state.compare_exchange_strong(READY_STATE, TASK_STATE_RUNNING)) + { + task* parent_task = nullptr; + if (tls_task_info.magic == 0xdeadbeef) + { + parent_task = tls_task_info.current_task; + } + else + { + set_current_worker(nullptr); + } + + tls_task_info.current_task = this; + + _spec->on_task_begin.execute(this); + + exec(); + + if (_state.compare_exchange_strong(RUNNING_STATE, TASK_STATE_FINISHED)) + { + _spec->on_task_end.execute(this); + + // signal_waiters(); [ + // inline for performance + void* evt = _wait_event.load(); + if (evt != nullptr) + { + auto nevt = (utils::notify_event*)evt; + nevt->notify(); + } + // ] + } + + // for timer + else + { + if (!_wait_for_cancel) + { + _spec->on_task_end.execute(this); + enqueue(); + } + else + { + _state.compare_exchange_strong(READY_STATE, TASK_STATE_CANCELLED); + _spec->on_task_end.execute(this); + + // signal_waiters(); [ + // inline for performance + void* evt = _wait_event.load(); + if (evt != nullptr) + { + auto nevt = (utils::notify_event*)evt; + nevt->notify(); + } + // ] + } + } + + tls_task_info.current_task = parent_task; + } + + if (!_spec->allow_inline && !_is_null) + { + service::lock_checker::check_dangling_lock(); + } +} + +void task::signal_waiters() +{ + void* evt = _wait_event.load(); + if (evt != nullptr) + { + auto nevt = (utils::notify_event*)evt; + nevt->notify(); + } +} + +// multiple callers may wait on this +bool task::wait(int timeout_milliseconds) +{ + service::lock_checker::check_wait_task(this); + + dassert (this != task::get_current_task(), "task cannot wait itself"); + + if (!spec().on_task_wait_pre.execute(task::get_current_task(), this, (uint32_t)timeout_milliseconds, true)) + { + spec().on_task_wait_post.execute(task::get_current_task(), this, false); + return false; + } + + if (state() >= TASK_STATE_FINISHED) + { + spec().on_task_wait_post.execute(task::get_current_task(), this, true); + return true; + } + + // TODO: using event pool instead + void* evt = _wait_event.load(); + if (evt == nullptr) + { + evt = new utils::notify_event(); + + void* null_h = nullptr; + if (!_wait_event.compare_exchange_strong(null_h, evt)) + { + delete (utils::notify_event*)evt; + evt = _wait_event.load(); + } + } + + bool ret = (state() >= TASK_STATE_FINISHED); + if (!ret) + { + auto nevt = (utils::notify_event*)evt; + ret = (nevt->wait_for(timeout_milliseconds)); + } + + spec().on_task_wait_post.execute(task::get_current_task(), this, ret); + return ret; +} + +bool task::cancel(bool wait_until_finished) +{ + task_state READY_STATE = TASK_STATE_READY; + task *current_tsk = task::get_current_task(); + bool ret = true; + bool succ = false; + + if (current_tsk == this) + { + /*dwarn( + "task %s (id=%016llx) cannot cancel itself", + spec().name, + id() + );*/ + return false; + } + + if (_state.compare_exchange_strong(READY_STATE, TASK_STATE_CANCELLED)) + { + succ = true; + } + else + { + task_state old_state = _state.load(); + if ((old_state == TASK_STATE_CANCELLED) || (old_state == TASK_STATE_FINISHED)) + { + } + else if (wait_until_finished) + { + _wait_for_cancel = true; + bool r = wait(); + dassert(r, "wait failed, it is only possible when task runs for more than 0x0fffffff ms"); + } + else + { + ret = false; + } + } + + if (current_tsk != nullptr) + { + current_tsk->spec().on_task_cancel_post.execute(current_tsk, this, succ); + } + + if (succ) + { + spec().on_task_cancelled.execute(this); + signal_waiters(); + } + + return ret; +} + +const char* task::node_name() const +{ + return node()->name(); +} + +void task::enqueue() +{ + dassert(_node != nullptr, "service node unknown for this task"); + dassert(_spec->type != TASK_TYPE_RPC_RESPONSE, "tasks with TASK_TYPE_RPC_RESPONSE type use task::enqueue(caller_pool()) instead"); + auto pool = node()->computation()->get_pool(spec().pool_code); + enqueue(pool); +} + +void task::enqueue(task_worker_pool* pool) +{ + if (spec().type == TASK_TYPE_COMPUTE) + { + spec().on_task_enqueue.execute(task::get_current_task(), this); + } + + // fast execution + if (_delay_milliseconds == 0 + && (_spec->allow_inline || _spec->fast_execution_in_network_thread || _is_null) + ) + { + exec_internal(); + } + + // normal path + else + { + dassert(pool != nullptr, "pool not exist, " + "must be the case where the caller is executed in io threads " + "which is forbidden unless you explicitly set [task.%s].fast_execution_in_network_thread = true", + _spec->name + ); + + task_ptr this_(this); + pool->enqueue(this_); + } +} + +timer_task::timer_task(task_code code, uint32_t interval_milliseconds, int hash) + : task(code, hash), _interval_milliseconds(interval_milliseconds) +{ + dassert (TASK_TYPE_COMPUTE == spec().type, "this must be a computation type task"); + + // enable timer randomization to avoid lots of timers execution simultaneously + set_delay(::dsn::service::env::random32(0, interval_milliseconds)); +} + +void timer_task::exec() +{ + task_state RUNNING_STATE = TASK_STATE_RUNNING; + + bool conti = on_timer(); + + if (conti && _interval_milliseconds > 0) + { + if (_state.compare_exchange_strong(RUNNING_STATE, TASK_STATE_READY)) + { + set_delay(_interval_milliseconds); + } + } +} + +rpc_request_task::rpc_request_task(message_ptr& request, service_node* node) + : task(task_code(request->header().local_rpc_code), request->header().client.hash, node), + _request(request) +{ + + dbg_dassert (TASK_TYPE_RPC_REQUEST == spec().type, "task type must be RPC_REQUEST"); +} + +void rpc_request_task::enqueue(service_node* node) +{ + spec().on_rpc_request_enqueue.execute(this); + task::enqueue(node->computation()->get_pool(spec().pool_code)); +} + +void rpc_response_task::exec() +{ + on_response(error(), _request, _response); +} + +rpc_response_task::rpc_response_task(message_ptr& request, int hash) + : task(task_spec::get(request->header().local_rpc_code)->rpc_paired_code, + hash == 0 ? request->header().client.hash : hash) +{ + set_error_code(ERR_IO_PENDING); + + dbg_dassert (TASK_TYPE_RPC_RESPONSE == spec().type, "task must be of RPC_RESPONSE type"); + + _request = request; + _caller_pool = task::get_current_worker() ? + task::get_current_worker()->pool() : nullptr; +} + +void rpc_response_task::enqueue(error_code err, message_ptr& reply) +{ + set_error_code(err); + _response = (err == ERR_SUCCESS ? reply : nullptr); + + if (spec().on_rpc_response_enqueue.execute(this, true)) + { + task::enqueue(_caller_pool); + } +} + +rpc_response_task_empty::rpc_response_task_empty(message_ptr& request, int hash) + : rpc_response_task(request, hash) +{ + _is_null = true; +} + +aio_task::aio_task(task_code code, int hash) + : task(code, hash) +{ + dassert (TASK_TYPE_AIO == spec().type, "task must be of AIO type"); + set_error_code(ERR_IO_PENDING); + + _aio = node()->disk()->prepare_aio_context(this); +} + +void aio_task::exec() +{ + on_completed(error(), _transferred_size); +} + +void aio_task::enqueue(error_code err, uint32_t transferred_size, service_node* node) +{ + set_error_code(err); + _transferred_size = transferred_size; + + spec().on_aio_enqueue.execute(this); + + if (node != nullptr) + { + task::enqueue(node->computation()->get_pool(spec().pool_code)); + } + else + { + task::enqueue(); + } +} + +} // end namespace diff --git a/src/core/task_code.cpp b/src/core/task_code.cpp new file mode 100644 index 0000000000..8e9bc8fafc --- /dev/null +++ b/src/core/task_code.cpp @@ -0,0 +1,205 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include +# include +# include + +#define __TITLE__ "task_spec" + +namespace dsn { + +task_code::task_code(const char* xxx, task_type type, threadpool_code pool, task_priority pri, int rpcPairedCode) + : dsn::utils::customized_id(xxx) +{ + if (!dsn::utils::singleton_vector_store::instance().contains(*this)) + { + task_spec* spec = new task_spec(*this, xxx, type, pool, rpcPairedCode, pri); + dsn::utils::singleton_vector_store::instance().put(*this, spec); + } +} + +task_spec* task_spec::get(int code) +{ + return dsn::utils::singleton_vector_store::instance().get(code); +} + +task_spec::task_spec(int code, const char* name, task_type type, threadpool_code pool, int paired_code, task_priority pri) + : code(code), name(name), type(type), pool_code(pool), rpc_paired_code(paired_code), priority(pri), + on_task_enqueue((std::string(name) + std::string(".enqueue")).c_str()), + on_task_begin((std::string(name) + std::string(".begin")).c_str()), + on_task_end((std::string(name) + std::string(".end")).c_str()), + on_task_wait_pre((std::string(name) + std::string(".wait.pre")).c_str()), + on_task_wait_post((std::string(name) + std::string(".wait.post")).c_str()), + on_task_cancel_post((std::string(name) + std::string(".cancel.post")).c_str()), + on_task_cancelled((std::string(name) + std::string(".cancelled")).c_str()), + on_aio_call((std::string(name) + std::string(".aio.call")).c_str()), + on_aio_enqueue((std::string(name) + std::string(".aio.enqueue")).c_str()), + on_rpc_call((std::string(name) + std::string(".rpc.call")).c_str()), + on_rpc_request_enqueue((std::string(name) + std::string(".rpc.request.enqueue")).c_str()), + on_rpc_reply((std::string(name) + std::string(".rpc.reply")).c_str()), + on_rpc_response_enqueue((std::string(name) + std::string(".rpc.response.enqueue")).c_str()), + on_create_response((std::string(name) + std::string(".create.response")).c_str()), + rpc_call_channel(RPC_CHANNEL_TCP), + rpc_call_header_format(NET_HDR_DSN) +{ + if (paired_code != 0) + { + task_spec* pc_spec = task_spec::get(paired_code); + pc_spec->rpc_paired_code.reset(task_code(code)); + } + + dassert ( + strlen(name) <= MAX_TASK_CODE_NAME_LENGTH, + "task code name '%s' is too long: length must not be larger than MAX_TASK_CODE_NAME_LENGTH (%u)", + name, MAX_TASK_CODE_NAME_LENGTH + ); + + rejection_handler = nullptr; + + // TODO: config for following values + rpc_call_channel = RPC_CHANNEL_TCP; + rpc_timeout_milliseconds = 5 * 1000; // 5 seconds + rpc_retry_interval_milliseconds = 3 * 1000; // 3 seconds +} + +bool task_spec::init(configuration_ptr config) +{ + /* + [task.default] + is_trace = false + is_profile = false + + [task.RPC_PREPARE] + pool_code = THREAD_POOL_REPLICATION + priority = TASK_PRIORITY_HIGH + is_trace = true + is_profile = true + */ + + task_spec default_spec(0, "placeholder", TASK_TYPE_COMPUTE, THREAD_POOL_DEFAULT, 0, TASK_PRIORITY_COMMON); + default_spec.priority = enum_from_string(config->get_string_value("task.default", "priority", "TASK_PRIORITY_COMMON").c_str(), TASK_PRIORITY_INVALID); + if (default_spec.priority == TASK_PRIORITY_INVALID) + { + derror("invalid task priority in [task.default]"); + return false; + } + + auto cn = config->get_string_value("task.default", "rpc_call_channel", RPC_CHANNEL_TCP.to_string()); + if (!rpc_channel::is_exist(cn.c_str())) + { + derror("invalid task rpc_call_channel in [task.default]"); + return false; + } + + auto fmt = config->get_string_value("task.default", "rpc_call_header_format", NET_HDR_DSN.to_string()); + if (!network_header_format::is_exist(fmt.c_str())) + { + derror("invalid task rpc_call_header_format in [task.default]"); + return false; + } + + default_spec.allow_inline = config->get_value("task.default", "allow_inline", false); + default_spec.fast_execution_in_network_thread = config->get_value("task.default", "fast_execution_in_network_thread", false); + default_spec.rpc_call_channel = rpc_channel::from_string(cn.c_str(), RPC_CHANNEL_TCP); + default_spec.rpc_call_header_format = network_header_format::from_string(fmt.c_str(), NET_HDR_DSN); + default_spec.rpc_timeout_milliseconds = config->get_value("task.default", "rpc_timeout_milliseconds", default_spec.rpc_timeout_milliseconds); + default_spec.rpc_retry_interval_milliseconds = config->get_value("task.default", "rpc_retry_interval_milliseconds", default_spec.rpc_retry_interval_milliseconds); + + for (int code = 0; code <= task_code::max_value(); code++) + { + if (code == TASK_CODE_INVALID) + continue; + + std::string section_name = std::string("task.") + std::string(task_code::to_string(code)); + task_spec* spec = task_spec::get(code); + dassert (spec != nullptr, "task_spec cannot be null"); + + if (config->has_section(section_name.c_str())) + { + auto pool = threadpool_code::from_string(config->get_string_value(section_name.c_str(), "pool_code", spec->pool_code.to_string()).c_str(), THREAD_POOL_INVALID); + if (pool == THREAD_POOL_INVALID) + { + derror("invalid ThreadPool in [%s]", section_name.c_str()); + return false; + } + + auto pri = enum_from_string(config->get_string_value(section_name.c_str(), "priority", enum_to_string(spec->priority)).c_str(), TASK_PRIORITY_INVALID); + if (pri == TASK_PRIORITY_INVALID) + { + derror("invalid priority in [%s]", section_name.c_str()); + return false; + } + + auto cn = config->get_string_value(section_name.c_str(), "rpc_call_channel", default_spec.rpc_call_channel.to_string()); + if (!rpc_channel::is_exist(cn.c_str())) + { + derror("invalid task rpc_call_channel in [%s]", section_name.c_str()); + return false; + } + + auto fmt = config->get_string_value(section_name.c_str(), "rpc_call_header_format", default_spec.rpc_call_header_format.to_string()); + if (!network_header_format::is_exist(fmt.c_str())) + { + derror("invalid task rpc_call_header_format in [%s]", section_name.c_str()); + return false; + } + + spec->pool_code.reset(pool); + spec->priority = pri; + spec->allow_inline = (spec->type != TASK_TYPE_RPC_RESPONSE + && spec->type != TASK_TYPE_RPC_REQUEST + && config->get_value(section_name.c_str(), "allow_inline", default_spec.allow_inline)); + spec->fast_execution_in_network_thread = + ((spec->type == TASK_TYPE_RPC_RESPONSE || spec->type == TASK_TYPE_RPC_REQUEST) + && config->get_value(section_name.c_str(), "fast_execution_in_network_thread", default_spec.fast_execution_in_network_thread)); + spec->rpc_call_channel = rpc_channel::from_string(cn.c_str(), RPC_CHANNEL_TCP); + spec->rpc_call_header_format = network_header_format::from_string(fmt.c_str(), NET_HDR_DSN); + spec->rpc_timeout_milliseconds = config->get_value(section_name.c_str(), "rpc_timeout_milliseconds", default_spec.rpc_timeout_milliseconds); + spec->rpc_retry_interval_milliseconds = config->get_value(section_name.c_str(), "rpc_retry_interval_milliseconds", default_spec.rpc_retry_interval_milliseconds); + } + else + { + spec->priority = default_spec.priority; + spec->allow_inline = (spec->type != TASK_TYPE_RPC_RESPONSE + && spec->type != TASK_TYPE_RPC_REQUEST + && default_spec.allow_inline + ); + spec->fast_execution_in_network_thread = + ((spec->type == TASK_TYPE_RPC_RESPONSE || spec->type == TASK_TYPE_RPC_REQUEST) + && default_spec.fast_execution_in_network_thread); + spec->rpc_call_channel = default_spec.rpc_call_channel; + spec->rpc_call_header_format = default_spec.rpc_call_header_format; + spec->rpc_timeout_milliseconds = default_spec.rpc_timeout_milliseconds; + spec->rpc_retry_interval_milliseconds = default_spec.rpc_retry_interval_milliseconds; + } + } + + return true; +} + +} // end namespace diff --git a/src/core/task_engine.cpp b/src/core/task_engine.cpp new file mode 100644 index 0000000000..b40e0352db --- /dev/null +++ b/src/core/task_engine.cpp @@ -0,0 +1,226 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "task_engine.h" +# include +# include +# include + +#define __TITLE__ "task_engine" + +using namespace dsn::utils; + +namespace dsn { + +task_worker_pool::task_worker_pool(const threadpool_spec& opts, task_engine* owner) + : _spec(opts), _owner(owner), _node(owner->node()) +{ + _is_running = false; + _pending_task_counter = dsn::utils::perf_counters::instance().get_counter((_spec.name + std::string(".PendingTask#")).c_str(), COUNTER_TYPE_NUMBER, true); +} + +void task_worker_pool::start() +{ + if (_is_running) + return; + + int qCount = _spec.partitioned ? _spec.worker_count : 1; + for (int i = 0; i < qCount; i++) + { + task_queue* q = factory_store::create(_spec.queue_factory_name.c_str(), PROVIDER_TYPE_MAIN, this, i, nullptr); + for (auto it = _spec.queue_aspects.begin(); + it != _spec.queue_aspects.end(); + it++) + { + q = factory_store::create(it->c_str(), PROVIDER_TYPE_ASPECT, this, i, q); + } + _queues.push_back(q); + + if (_spec.admission_controller_factory_name != "") + { + admission_controller* controller = factory_store::create(_spec.admission_controller_factory_name.c_str(), + PROVIDER_TYPE_MAIN, + q, _spec.admission_controller_arguments.c_str()); + + if (controller) + { + _controllers.push_back(controller); + q->set_controller(controller); + } + else + { + _controllers.push_back(nullptr); + } + } + else + { + _controllers.push_back(nullptr); + } + } + + for (int i = 0; i < _spec.worker_count; i++) + { + auto q = _queues[qCount == 1 ? 0 : i]; + task_worker* worker = factory_store::create(_spec.worker_factory_name.c_str(), PROVIDER_TYPE_MAIN, this, q, i, nullptr); + for (auto it = _spec.worker_aspects.begin(); + it != _spec.worker_aspects.end(); + it++) + { + worker = factory_store::create(it->c_str(), PROVIDER_TYPE_ASPECT, this, q, i, worker); + } + task_worker::on_create.execute(worker); + + _workers.push_back(worker); + worker->start(); + } + + _is_running = true; +} + +void task_worker_pool::enqueue(task_ptr& task) +{ + dassert (task->spec().pool_code == spec().pool_code || task->spec().type == TASK_TYPE_RPC_RESPONSE, "Invalid thread pool used"); + + if (_is_running) + { + int idx = (_spec.partitioned ? task->hash() % _queues.size() : 0); + task_queue* q = _queues[idx]; + if (task->delay_milliseconds() == 0) + { + auto controller = _controllers[idx]; + if (controller != nullptr) + { + while (!controller->is_task_accepted(task)) + { + // any customized rejection handler? + if (task->spec().rejection_handler != nullptr) + { + task->spec().rejection_handler(task.get(), controller); + + dlog(log_level_DEBUG, __TITLE__, + "timer_task %s (%016llx) is rejected", + task->spec().name, + task->id() + ); + + return; + } + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + else if (task->spec().type == TASK_TYPE_RPC_REQUEST && _spec.max_input_queue_length != 0xFFFFFFFFUL) + { + while ((uint32_t)q->count() >= _spec.max_input_queue_length) + { + // any customized rejection handler? + if (task->spec().rejection_handler != nullptr) + { + task->spec().rejection_handler(task.get(), controller); + + dlog(log_level_DEBUG, __TITLE__, + "task %s (%016llx) is rejected because the target queue is full", + task->spec().name, + task->id() + ); + + return; + } + + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } + + _pending_task_counter->increment(); + } + + return q->enqueue(task); + } + else + { + dassert (false, "worker pool %s must be started before enqueue task %s", + spec().name.c_str(), + task->spec().name + ); + } +} +// +//void task_worker_pool::on_dequeue(int count) +//{ +// _pending_task_counter->Subtract((unsigned long long)count); +//} + +bool task_worker_pool::shared_same_worker_with_current_task(task* tsk) const +{ + task* current = task::get_current_task(); + if (nullptr != current) + { + if (current->spec().pool_code != tsk->code()) + return false; + else if (_workers.size() == 1) + return true; + else if (_spec.partitioned) + { + int sz = static_cast(_workers.size()); + return current->hash() % sz == tsk->hash() % sz; + } + else + { + return false; + } + } + else + { + return false; + } +} + +task_engine::task_engine(service_node* node) +{ + _is_running = false; + _node = node; +} + +void task_engine::start(const std::vector& spec) +{ + if (_is_running) + return; + + // init pools + _pools.resize(threadpool_code::max_value() + 1); + for (auto it = spec.begin(); it != spec.end(); it++) + { + if ((*it).run) + { + auto workerPool = new task_worker_pool(*it, this); + workerPool->start(); + _pools[workerPool->spec().pool_code] = workerPool; + } + } + + _is_running = true; +} + +} // end namespace diff --git a/src/core/task_engine.h b/src/core/task_engine.h new file mode 100644 index 0000000000..d9bf922d27 --- /dev/null +++ b/src/core/task_engine.h @@ -0,0 +1,102 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include "service_engine.h" +# include +# include +# include +# include + +namespace dsn { + +class task_engine; +class task_worker_pool; +class task_worker; + +// +// a task_worker_pool is a set of TaskWorkers share the same configs; +// they may even share the same task_queue when partitioned == true +// +class task_worker_pool +{ +public: + task_worker_pool(const threadpool_spec& opts, task_engine* owner); + + // service management + void start(); + + // task procecessing + void enqueue(task_ptr& task); + void on_dequeue(int count); + + // inquery + const threadpool_spec& spec() const { return _spec; } + bool shared_same_worker_with_current_task(task* task) const; + task_engine* engine() const { return _owner; } + service_node* node() const { return _node; } + +private: + threadpool_spec _spec; + task_engine* _owner; + service_node* _node; + + std::vector _workers; + std::vector _queues; + std::vector _controllers; + + bool _is_running; + perf_counter_ptr _pending_task_counter; +}; + +class task_engine +{ +public: + task_engine(service_node* node); + + // + // service management routines + // + void start(const std::vector& spec); + + // + // task management routines + // + task_worker_pool* get_pool(int code) const { return _pools[code]; } + + bool is_started() const { return _is_running; } + + service_node* node() const { return _node; } + +private: + std::vector _pools; + volatile bool _is_running; + service_node* _node; +}; + +// -------------------- inline implementation ---------------------------- + +} // end namespace diff --git a/src/core/task_queue.cpp b/src/core/task_queue.cpp new file mode 100644 index 0000000000..12288db30c --- /dev/null +++ b/src/core/task_queue.cpp @@ -0,0 +1,49 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include "task_engine.h" +# include +# include +# define __TITLE__ "task_queue" + +namespace dsn { + +task_queue::task_queue(task_worker_pool* pool, int index, task_queue* inner_provider) : _pool(pool), _controller(nullptr) +{ + char num[30]; + sprintf(num, "%u", index); + _name = pool->spec().name + '.'; + _name.append(num); + _qps_counter = dsn::utils::perf_counters::instance().get_counter((_name + std::string(".qps")).c_str(), COUNTER_TYPE_RATE, true); +} + +//void task_queue::on_dequeue(int count) +//{ +// _qps_counter->add((unsigned long long)count); +// _pool->on_dequeue(count); +//} + +} diff --git a/src/core/task_worker.cpp b/src/core/task_worker.cpp new file mode 100644 index 0000000000..0a6b4d9801 --- /dev/null +++ b/src/core/task_worker.cpp @@ -0,0 +1,220 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include "task_engine.h" +# include + +# ifdef _WIN32 + +# else +//# include +# endif + + +# define __TITLE__ "task.worker" + +namespace dsn { + +join_point task_worker::on_start("task_worker::on_start"); +join_point task_worker::on_create("task_worker::on_create"); + +task_worker::task_worker(task_worker_pool* pool, task_queue* q, int index, task_worker* inner_provider) +{ + _owner_pool = pool; + _input_queue = q; + _index = index; + + char name[256]; + sprintf(name, "%5s.%s.%u", pool->node()->name(), pool->spec().name.c_str(), index); + _name = std::string(name); + _is_running = false; + + _thread = nullptr; +} + +task_worker::~task_worker() +{ + stop(); +} + +void task_worker::start() +{ + if (_is_running) + return; + + _is_running = true; + + _thread = new std::thread(std::bind(&task_worker::run_internal, this)); + + _started.wait(); +} + +void task_worker::stop() +{ + if (!_is_running) + return; + + _is_running = false; + + _thread->join(); + delete _thread; + _thread = nullptr; + + _is_running = false; +} + +void task_worker::set_name() +{ +# ifdef _WIN32 + + #ifndef MS_VC_EXCEPTION + #define MS_VC_EXCEPTION 0x406D1388 + #endif + + typedef struct tagTHREADNAME_INFO + { + uint32_t dwType; // Must be 0x1000. + LPCSTR szName; // Pointer to name (in user addr space). + uint32_t dwThreadID; // Thread ID (-1=caller thread). + uint32_t dwFlags; // Reserved for future use, must be zero. + }THREADNAME_INFO; + + THREADNAME_INFO info; + info.dwType = 0x1000; + info.szName = name().c_str(); + info.dwThreadID = (uint32_t)-1; + info.dwFlags = 0; + + __try + { + ::RaiseException (MS_VC_EXCEPTION, 0, sizeof(info)/sizeof(uint32_t), (ULONG_PTR*)&info); + } + __except(EXCEPTION_CONTINUE_EXECUTION) + { + } + +# else +// prctl(PR_SET_NAME, name(), 0, 0, 0) +# endif +} + +void task_worker::set_priority(worker_priority_t pri) +{ +# ifdef _WIN32 + static int g_thread_priority_map[] = + { + THREAD_PRIORITY_LOWEST, + THREAD_PRIORITY_BELOW_NORMAL, + THREAD_PRIORITY_NORMAL, + THREAD_PRIORITY_ABOVE_NORMAL, + THREAD_PRIORITY_HIGHEST + }; + + C_ASSERT(ARRAYSIZE(g_thread_priority_map) == THREAD_xPRIORITY_COUNT); + + ::SetThreadPriority(_thread->native_handle(), g_thread_priority_map[(pool_spec().worker_priority)]); +# else +//# error "not implemented" +# endif +} + +void task_worker::set_affinity(uint64_t affinity) +{ +# ifdef _WIN32 + ::SetThreadAffinityMask(_thread->native_handle(), static_cast(affinity)); +# else +//# error "not implemented" +# endif +} + +void task_worker::run_internal() +{ + while (_thread == nullptr) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + + task::set_current_worker(this); + + set_name(); + set_priority(pool_spec().worker_priority); + + if (true == pool_spec().worker_share_core) + { + if (pool_spec().worker_affinity_mask > 0) + { + set_affinity(pool_spec().worker_affinity_mask); + } + } + else + { + uint64_t current_mask = pool_spec().worker_affinity_mask; + for (int i = 0; i < _index; ++i) + { + current_mask &= current_mask - 1; + if (0 == current_mask) + { + current_mask = pool_spec().worker_affinity_mask; + } + } + current_mask -= current_mask & current_mask - 1; + + set_affinity(current_mask); + } + + _started.notify(); + + on_start.execute(this); + + loop(); +} + +void task_worker::loop() +{ + task_queue* q = queue(); + + //try { + while (_is_running) + { + task_ptr task = q->dequeue(); + if (task != nullptr) + { + task->exec_internal(); + } + } + /*} + catch (std::exception& ex) + { + dassert (false, "%s: unhandled exception '%s'", name().c_str(), ex.what()); + }*/ +} + +const threadpool_spec& task_worker::pool_spec() const +{ + return pool()->spec(); +} + +} // end namespace diff --git a/src/core/tool_api.cpp b/src/core/tool_api.cpp new file mode 100644 index 0000000000..cc882ae75a --- /dev/null +++ b/src/core/tool_api.cpp @@ -0,0 +1,205 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include "service_engine.h" +# include +# include + +namespace dsn { namespace tools { + +tool_base::tool_base(const char* name) +{ + _name = name; +} + +toollet::toollet(const char* name) + : tool_base(name) +{ +} + +tool_app::tool_app(const char* name) + : tool_base(name) +{ +} + +DEFINE_TASK_CODE(LPC_CONTROL_SERVICE_APP, TASK_PRIORITY_HIGH, THREAD_POOL_DEFAULT) + +class service_control_task : public task +{ +public: + service_control_task(service::service_app* app, bool start) + : _app(app), task(LPC_CONTROL_SERVICE_APP, 0, app->svc_node()), _start(start) + { + } + + void exec() + { + if (_start) + { + auto err = _app->start(_app->arg_count(), _app->args()); + dassert (err == ERR_SUCCESS, "start app failed, err = %s", err.to_string()); + } + else + _app->stop(); + } + +private: + service::service_app* _app; + bool _start; // false for stop +}; + +void tool_app::start_all_service_apps() +{ + auto apps = service::system::get_all_apps(); + for (auto it = apps.begin(); it != apps.end(); it++) + { + task_ptr t(new service_control_task(it->second, true)); + t->set_delay(1000 * it->second->spec().delay_seconds); + t->enqueue(); + } +} + + +void tool_app::stop_all_service_apps() +{ + auto apps = service::system::get_all_apps(); + for (auto it = apps.begin(); it != apps.end(); it++) + { + task_ptr t(new service_control_task(it->second, false)); + t->enqueue(); + } +} + +const service_spec& tool_app::get_service_spec() +{ + return service_engine::instance().spec(); +} + +configuration_ptr config() +{ + return service_engine::instance().spec().config; +} + +join_point syste_init("system.init"); +join_point syste_exit("system.exit"); // type, error code, context (e.g., exception) + +namespace internal_use_only +{ + bool register_toollet(const char* name, toollet_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_tool(const char* name, tool_app_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, task_queue_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, task_worker_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, admission_controller_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, lock_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, read_write_lock_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, semaphore_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, network_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, aio_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, env_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, perf_counter_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, nfs_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, logging_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + bool register_component_provider(const char* name, message_parser_factory f, int type) + { + return dsn::utils::factory_store::register_factory(name, f, type); + } + + toollet* get_toollet(const char* name, int type) + { + toollet* tlt = nullptr; + if (utils::singleton_store::instance().get(name, tlt)) + return tlt; + else + { + tlt = utils::factory_store::create(name, type, name); + utils::singleton_store::instance().put(name, tlt); + return tlt; + } + } + + configuration_ptr config() + { + return ::dsn::service::system::config(); + } +} +}} // end namespace dsn::tool_api diff --git a/src/core/utils.cpp b/src/core/utils.cpp new file mode 100644 index 0000000000..74d7c405d0 --- /dev/null +++ b/src/core/utils.cpp @@ -0,0 +1,469 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include +# include + +# define __TITLE__ "dsn.utils" + +namespace dsn { + namespace utils { + + void split_args(const char* args, __out_param std::vector& sargs, char splitter) + { + sargs.clear(); + + std::string v(args); + + int lastPos = 0; + while (true) + { + auto pos = v.find(splitter, lastPos); + if (pos != std::string::npos) + { + std::string s = v.substr(lastPos, pos - lastPos); + if (s.length() > 0) + { + std::string s2 = trim_string((char*)s.c_str()); + if (s2.length() > 0) + sargs.push_back(s2); + } + lastPos = static_cast(pos + 1); + } + else + { + std::string s = v.substr(lastPos); + if (s.length() > 0) + { + std::string s2 = trim_string((char*)s.c_str()); + if (s2.length() > 0) + sargs.push_back(s2); + } + break; + } + } + } + void split_args(const char* args, __out_param std::list& sargs, char splitter) + { + sargs.clear(); + + std::string v(args); + + int lastPos = 0; + while (true) + { + auto pos = v.find(splitter, lastPos); + if (pos != std::string::npos) + { + std::string s = v.substr(lastPos, pos - lastPos); + if (s.length() > 0) + { + std::string s2 = trim_string((char*)s.c_str()); + if (s2.length() > 0) + sargs.push_back(s2); + } + lastPos = static_cast(pos + 1); + } + else + { + std::string s = v.substr(lastPos); + if (s.length() > 0) + { + std::string s2 = trim_string((char*)s.c_str()); + if (s2.length() > 0) + sargs.push_back(s2); + } + break; + } + } + } + + char* trim_string(char* s) + { + while (*s != '\0' && (*s == ' ' || *s == '\t')) { s++; } + char* r = s; + s += strlen(s); + while (s >= r && (*s == '\0' || *s == ' ' || *s == '\t' || *s == '\r' || *s == '\n')) { *s = '\0'; s--; } + return r; + } + + class random64_generator : public singleton + { + public: + random64_generator() + : _rng(std::random_device()()) + { + } + + uint64_t next() + { + return _dist(_rng); + } + + private: + std::default_random_engine _rng; + std::uniform_int_distribution _dist; + }; + + + uint64_t get_random64() + { + return random64_generator::instance().next(); + } + + uint64_t get_random64_pseudo() + { + uint64_t v = ((uint64_t)std::rand()); + v *= ((uint64_t)std::rand()); + v *= ((uint64_t)std::rand()); + v *= ((uint64_t)std::rand()); + v *= ((uint64_t)std::rand()); + v ^= ((uint64_t)std::rand()); + return v; + } + + + uint64_t get_current_physical_time_ns() + { + return env_provider::get_current_physical_time_ns(); + } + + void time_ms_to_string(uint64_t ts_ms, char* str) + { + auto hr = static_cast(ts_ms / (60ULL * 60ULL * 1000ULL) % 24); + auto min = static_cast(ts_ms / (60ULL * 1000ULL) % 60); + auto sc = static_cast(ts_ms / (1000ULL) % 60); + auto ms = static_cast(ts_ms % 1000); + + sprintf(str, "%02u:%02u:%02u.%03u", hr, min, sc, ms); + } + } +} + +namespace dsn +{ + + binary_reader::binary_reader(blob& blob) + { + _blob = blob; + _size = blob.length(); + _ptr = blob.data(); + _remaining_size = _size; + } + + int binary_reader::read(__out_param std::string& s) + { + int len; + if (0 == read(len)) + return 0; + + s.resize(len, 0); + + if (len > 0) + { + int x = read((char*)&s[0], len); + return x == 0 ? x : (x + sizeof(len)); + } + else + { + return static_cast(sizeof(len)); + } + } + + int binary_reader::read(blob& blob) + { + int len; + if (0 == read(len)) + return 0; + + if (len <= get_remaining_size()) + { + blob = _blob.range(static_cast(_ptr - _blob.data()), len); + _ptr += len; + _remaining_size -= len; + return len + sizeof(len); + } + else + { + dwarn("read beyond the end of buffer"); + return 0; + } + } + + int binary_reader::read(char* buffer, int sz) + { + if (sz <= get_remaining_size()) + { + memcpy((void*)buffer, _ptr, sz); + _ptr += sz; + _remaining_size -= sz; + return sz; + } + else + { + dwarn("read beyond the end of buffer"); + return 0; + } + } + + bool binary_reader::next(const void** data, int* size) + { + if (get_remaining_size() > 0) + { + *data = (const void*)_ptr; + *size = _remaining_size; + + _ptr += _remaining_size; + _remaining_size = 0; + return true; + } + else + return false; + } + + bool binary_reader::backup(int count) + { + if (count <= static_cast(_ptr - _blob.data())) + { + _ptr -= count; + _remaining_size += count; + return true; + } + else + return false; + } + + bool binary_reader::skip(int count) + { + if (count <= get_remaining_size()) + { + _ptr += count; + _remaining_size -= count; + return true; + } + else + { + dwarn("read beyond the end of buffer"); + return false; + } + } + + int binary_writer::_reserved_size_per_buffer_static = 256; + + binary_writer::binary_writer(int reserveBufferSize) + { + _total_size = 0; + + _buffers.reserve(1); + _data.reserve(1); + + _cur_pos = -1; + _cur_is_placeholder = false; + + _reserved_size_per_buffer = (reserveBufferSize == 0) ? _reserved_size_per_buffer_static : reserveBufferSize; + + create_buffer_and_writer(); + } + + binary_writer::binary_writer(blob& buffer) + { + _total_size = 0; + + _buffers.reserve(1); + _data.reserve(1); + + _cur_pos = -1; + _cur_is_placeholder = false; + + _reserved_size_per_buffer = _reserved_size_per_buffer_static; + + create_buffer_and_writer(&buffer); + } + + binary_writer::~binary_writer() + { + } + + void binary_writer::create_buffer_and_writer(blob* pBuffer) + { + if (pBuffer == nullptr) + { + std::shared_ptr ptr((char*)malloc(_reserved_size_per_buffer)); + blob bb(ptr, _reserved_size_per_buffer); + _buffers.push_back(bb); + + bb._length = 0; + _data.push_back(bb); + } + else + { + _buffers.push_back(*pBuffer); + + pBuffer->_length = 0; + _data.push_back(*pBuffer); + } + + ++_cur_pos; + } + + uint16_t binary_writer::write_placeholder() + { + if (_cur_is_placeholder) + { + create_buffer_and_writer(); + } + _cur_is_placeholder = true; + return (uint16_t)_cur_pos; + } + + blob binary_writer::get_buffer() const + { + if (_data.size() == 1) + { + return _data[0]; + } + else + { + std::shared_ptr bptr((char*)malloc(_total_size)); + blob bb(bptr, _total_size); + const char* ptr = bb.data(); + + for (int i = 0; i < static_cast(_data.size()); i++) + { + memcpy((void*)ptr, (const void*)_data[i].data(), (size_t)_data[i].length()); + ptr += _data[i].length(); + } + return bb; + } + } + + void binary_writer::write(const char* buffer, int sz, uint16_t pos /*= 0xffff*/) + { + int sz0 = sz; + + if (pos != 0xffff) + { + int rem_size = _buffers[pos].length() - _data[pos].length(); + if (sz > rem_size) + { + int allocSize = _data[pos].length() + sz; + std::shared_ptr ptr((char*)malloc(allocSize)); + blob bb(ptr, allocSize); + + memcpy((void*)bb.data(), (const void*)_data[pos].data(), (size_t)_data[pos].length()); + memcpy((void*)(bb.data() + _data[pos].length()), (const void*)buffer, (size_t)sz); + + _buffers[pos] = bb; + _data[pos] = bb; + } + else + { + memcpy((void*)(_data[pos].data() + _data[pos].length()), buffer, (size_t)sz); + _data[pos]._length += sz; + } + } + else + { + if (_cur_is_placeholder) + { + create_buffer_and_writer(); + _cur_is_placeholder = false; + } + + pos = (uint16_t)_cur_pos; + + int rem_size = _buffers[pos].length() - _data[pos].length(); + if (rem_size >= sz) + { + memcpy((void*)(_data[pos].data() + _data[pos].length()), buffer, (size_t)sz); + _data[pos]._length += sz; + } + else + { + memcpy((void*)(_data[pos].data() + _data[pos].length()), buffer, (size_t)rem_size); + _data[pos]._length += rem_size; + + sz -= rem_size; + buffer += rem_size; + + int allocSize = _reserved_size_per_buffer; + if (sz > allocSize) + allocSize = sz; + + std::shared_ptr ptr((char*)malloc(allocSize)); + blob bb(ptr, allocSize); + _buffers.push_back(bb); + + bb._length = 0; + _data.push_back(bb); + + pos = (uint16_t)(++_cur_pos); + + memcpy((void*)(_data[pos].data() + _data[pos].length()), buffer, (size_t)sz); + _data[pos]._length += sz; + } + } + + _total_size += sz0; + } + + bool binary_writer::next(void** data, int* size) + { + int sz = _buffers[_cur_pos].length() - _data[_cur_pos].length(); + if (sz == 0) + { + std::shared_ptr ptr((char*)malloc(_reserved_size_per_buffer)); + blob bb(ptr, _reserved_size_per_buffer); + _buffers.push_back(bb); + + bb._length = 0; + _data.push_back(bb); + ++_cur_pos; + + sz = _reserved_size_per_buffer; + } + + *size = sz; + *data = (void*)(_data[_cur_pos].data() + _data[_cur_pos].length()); + _data[_cur_pos]._length += sz; + _total_size += sz; + return true; + } + + bool binary_writer::backup(int count) + { + dassert(count <= _data[_cur_pos].length(), "currently we don't support backup before the last buffer's header"); + _data[_cur_pos]._length -= count; + _total_size -= count; + return true; + } +} // end namespace dsn + + + + + diff --git a/src/core/zlocks.cpp b/src/core/zlocks.cpp new file mode 100644 index 0000000000..1367270bf9 --- /dev/null +++ b/src/core/zlocks.cpp @@ -0,0 +1,201 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include +# include +# include +# include "service_engine.h" + +# define __TITLE__ "lock" + +using namespace dsn::utils; + +namespace dsn { namespace service { + + namespace lock_checker + { + __thread int zlock_exclusive_count = 0; + __thread int zlock_shared_count = 0; + + void check_wait_safety() + { + if (zlock_exclusive_count + zlock_shared_count > 0) + { + dassert (false, "wait inside locks may lead to deadlocks - current thread owns %u exclusive locks and %u shared locks now.", + zlock_exclusive_count, zlock_shared_count + ); + } + } + + void check_dangling_lock() + { + if (zlock_exclusive_count + zlock_shared_count > 0) + { + dassert (false, "locks should not be hold at this point - current thread owns %u exclusive locks and %u shared locks now.", + zlock_exclusive_count, zlock_shared_count + ); + } + } + + void check_wait_task(task* waitee) + { + check_wait_safety(); + + if (nullptr != task::get_current_task() && !waitee->is_empty()) + { + if (TASK_TYPE_RPC_RESPONSE == waitee->spec().type || + task::get_current_task()->spec().pool_code == waitee->spec().pool_code) + { + dassert(false, "task %s waits for another task %s sharing the same thread pool - will lead to deadlocks easily (e.g., when worker_count = 1 or when the pool is partitioned)", + task::get_current_task()->spec().code.to_string(), + waitee->spec().code.to_string() + ); + } + } + } + } + +zlock::zlock(void) +{ + lock_provider* last = factory_store::create(service_engine::instance().spec().lock_factory_name.c_str(), PROVIDER_TYPE_MAIN, this, nullptr); + + // TODO: perf opt by saving the func ptrs somewhere + for (auto it = service_engine::instance().spec().lock_aspects.begin(); + it != service_engine::instance().spec().lock_aspects.end(); + it++) + { + last = factory_store::create(it->c_str(), PROVIDER_TYPE_ASPECT, this, last); + } + + _provider = last; +} + +zlock::~zlock(void) +{ + delete _provider; +} + + +zrwlock::zrwlock(void) +{ + rwlock_provider* last = factory_store::create(service_engine::instance().spec().rwlock_factory_name.c_str(), PROVIDER_TYPE_MAIN, this, nullptr); + + // TODO: perf opt by saving the func ptrs somewhere + for (auto it = service_engine::instance().spec().rwlock_aspects.begin(); + it != service_engine::instance().spec().rwlock_aspects.end(); + it++) + { + last = factory_store::create(it->c_str(), PROVIDER_TYPE_ASPECT, this, last); + } + + _provider = last; +} + +zrwlock::~zrwlock(void) +{ + delete _provider; +} + +zsemaphore::zsemaphore(int initialCount) +{ + semaphore_provider* last = factory_store::create(service_engine::instance().spec().semaphore_factory_name.c_str(), PROVIDER_TYPE_MAIN, this, initialCount, nullptr); + + // TODO: perf opt by saving the func ptrs somewhere + for (auto it = service_engine::instance().spec().semaphore_aspects.begin(); + it != service_engine::instance().spec().semaphore_aspects.end(); + it++) + { + last = factory_store::create(it->c_str(), PROVIDER_TYPE_ASPECT, this, initialCount, last); + } + + _provider = last; +} + +zsemaphore::~zsemaphore() +{ + delete _provider; +} + +//------------------------------- event ---------------------------------- + +zevent::zevent(bool manualReset, bool initState/* = false*/) +{ + _manualReset = manualReset; + _signaled = initState; + if (_signaled) + { + _sema.signal(); + } +} + +zevent::~zevent() +{ +} + +void zevent::set() +{ + bool nonsignaled = false; + if (std::atomic_compare_exchange_strong(&_signaled, &nonsignaled, true)) + { + _sema.signal(); + } +} + +void zevent::reset() +{ + if (_manualReset) + { + bool signaled = true; + if (std::atomic_compare_exchange_strong(&_signaled, &signaled, false)) + { + } + } +} + +bool zevent::wait(int timeout_milliseconds) +{ + lock_checker::check_wait_safety(); + + if (_manualReset) + { + if (std::atomic_load(&_signaled)) + return true; + + _sema.wait(timeout_milliseconds); + return std::atomic_load(&_signaled); + } + + else + { + bool signaled = true; + if (std::atomic_compare_exchange_strong(&_signaled, &signaled, false)) + return true; + + _sema.wait(timeout_milliseconds); + return std::atomic_compare_exchange_strong(&_signaled, &signaled, false); + } +} + +}} // end namespace dsn::service diff --git a/src/dev/CMakeLists.txt b/src/dev/CMakeLists.txt new file mode 100644 index 0000000000..c8d56bc735 --- /dev/null +++ b/src/dev/CMakeLists.txt @@ -0,0 +1 @@ +dsn_add_library(dsn.dev) diff --git a/src/dev/service.api.oo.impl.cpp b/src/dev/service.api.oo.impl.cpp new file mode 100644 index 0000000000..9c64030985 --- /dev/null +++ b/src/dev/service.api.oo.impl.cpp @@ -0,0 +1,172 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# include + +namespace dsn { + namespace service + { + namespace tasking + { + class service_task : public task, public service_context_manager + { + public: + service_task(task_code code, servicelet* svc, task_handler& handler, int hash = 0) + : task(code, hash), service_context_manager(svc, this) + { + _handler = handler; + } + + virtual void exec() + { + if (nullptr != _handler) + { + _handler(); + _handler = nullptr; + } + } + + //task_handler& handler() { return _handler; } + + private: + task_handler _handler; + }; + + class service_timer_task : public timer_task, public service_context_manager + { + public: + service_timer_task(task_code code, servicelet* svc, task_handler& handler, uint32_t intervalMilliseconds, int hash = 0) + : timer_task(code, intervalMilliseconds, hash), service_context_manager(svc, this) + { + _handler = handler; + } + + virtual bool on_timer() { _handler(); return true; } + + private: + task_handler _handler; + }; + + task_ptr enqueue( + task_code evt, + servicelet *context, + task_handler callback, + int hash /*= 0*/, + int delay_milliseconds /*= 0*/, + int timer_interval_milliseconds /*= 0*/ + ) + { + task_ptr tsk; + if (timer_interval_milliseconds != 0) + tsk.reset(new service_timer_task(evt, context, callback, timer_interval_milliseconds, hash)); + else + tsk.reset(new service_task(evt, context, callback, hash)); + + enqueue(tsk, delay_milliseconds); + return tsk; + } + } + + namespace rpc + { + + rpc_response_task_ptr call( + const end_point& server, + message_ptr& request, + servicelet* owner, + std::function callback, + int reply_hash /*= 0*/ + ) + { + rpc_response_task_ptr resp_task(new internal_use_only::service_rpc_response_task4( + owner, + callback, + request, + reply_hash + )); + + return rpc::call(server, request, resp_task); + } + } + + namespace file + { + aio_task_ptr read( + handle_t hFile, + char* buffer, + int count, + uint64_t offset, + task_code callback_code, + servicelet* owner, + aio_handler callback, + int hash /*= 0*/ + ) + { + aio_task_ptr tsk(new internal_use_only::service_aio_task(callback_code, owner, callback, hash)); + read(hFile, buffer, count, offset, tsk); + return tsk; + } + + aio_task_ptr write( + handle_t hFile, + const char* buffer, + int count, + uint64_t offset, + task_code callback_code, + servicelet* owner, + aio_handler callback, + int hash /*= 0*/ + ) + { + aio_task_ptr tsk(new internal_use_only::service_aio_task(callback_code, owner, callback, hash)); + write(hFile, buffer, count, offset, tsk); + return tsk; + } + + + aio_task_ptr copy_remote_files( + const end_point& remote, + std::string& source_dir, + std::vector& files, // empty for all + std::string& dest_dir, + bool overwrite, + task_code callback_code, + servicelet* owner, + aio_handler callback, + int hash /*= 0*/ + ) + { + aio_task_ptr tsk(new internal_use_only::service_aio_task(callback_code, owner, callback, hash)); + copy_remote_files(remote, source_dir, files, dest_dir, overwrite, tsk); + return tsk; + } + } + + } // end namespace service +} // end namespace + + + diff --git a/src/dev/service.cpp b/src/dev/service.cpp new file mode 100644 index 0000000000..bc413768f5 --- /dev/null +++ b/src/dev/service.cpp @@ -0,0 +1,113 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# include +# include +# include + +namespace dsn { +namespace service { + +class service_objects : public ::dsn::utils::singleton +{ +public: + void add(servicelet* obj) + { + std::lock_guard l(_lock); + _services.insert(obj); + } + + void remove(servicelet* obj) + { + std::lock_guard l(_lock); + _services.erase(obj); + } + +private: + std::mutex _lock; + std::set _services; +}; + +static service_objects* s_services = &(service_objects::instance()); + +servicelet::servicelet() +{ + _access_thread_id_inited = false; + _last_id = 0; + service_objects::instance().add(this); +} + +servicelet::~servicelet() +{ + clear_outstanding_tasks(); + service_objects::instance().remove(this); +} + +int servicelet::add_outstanding_task(task* tsk) +{ + std::lock_guard l(_outstanding_tasks_lock); + int id = ++_last_id; + _outstanding_tasks.insert(std::map::value_type(id, tsk)); + return id; +} + +void servicelet::remove_outstanding_task(int id) +{ + std::lock_guard l(_outstanding_tasks_lock); + auto pr = _outstanding_tasks.erase(id); + dassert (pr == 1, "task with local id %d is not found in the hash table", id); +} + +void servicelet::clear_outstanding_tasks() +{ + std::lock_guard l(_outstanding_tasks_lock); + for (auto it = _outstanding_tasks.begin(); it != _outstanding_tasks.end(); it++) + { + it->second->cancel(true); + + auto sc = dynamic_cast(it->second); + if (nullptr != sc) + { + sc->clear_context(); + } + } + _outstanding_tasks.clear(); +} + +void servicelet::check_hashed_access() +{ + if (_access_thread_id_inited) + { + dassert (std::this_thread::get_id() == _access_thread_id, "the service is assumed to be accessed by one thread only!"); + } + else + { + _access_thread_id = std::this_thread::get_id(); + _access_thread_id_inited = true; + } +} + +}} // end namespace dsn::service diff --git a/src/dist/CMakeLists.txt b/src/dist/CMakeLists.txt new file mode 100644 index 0000000000..650439b54b --- /dev/null +++ b/src/dist/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(failure_detector) diff --git a/src/dist/failure_detector/CMakeLists.txt b/src/dist/failure_detector/CMakeLists.txt new file mode 100644 index 0000000000..5283cb9d7e --- /dev/null +++ b/src/dist/failure_detector/CMakeLists.txt @@ -0,0 +1 @@ +dsn_add_library(dsn.failure_detector) diff --git a/src/dist/failure_detector/failure_detector.cpp b/src/dist/failure_detector/failure_detector.cpp new file mode 100644 index 0000000000..d4a3530ad0 --- /dev/null +++ b/src/dist/failure_detector/failure_detector.cpp @@ -0,0 +1,483 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# include +# include +# include +# include + +#define __TITLE__ "failure_detector" + +using namespace ::dsn::service; + +namespace dsn { +namespace fd { + +failure_detector::failure_detector() +{ + auto pool = task_spec::get(LPC_BEACON_CHECK)->pool_code; + task_spec::get(RPC_FD_FAILURE_DETECTOR_PING)->pool_code = pool; + task_spec::get(RPC_FD_FAILURE_DETECTOR_PING_ACK)->pool_code = pool; +} + +int failure_detector::start( + uint32_t check_interval_seconds, + uint32_t beacon_interval_seconds, + uint32_t lease_seconds, + uint32_t grace_seconds, + bool use_allow_list) +{ + _check_interval_milliseconds = check_interval_seconds * 1000; + _beacon_interval_milliseconds = beacon_interval_seconds * 1000; + _lease_milliseconds = lease_seconds * 1000; + _grace_milliseconds = grace_seconds * 1000; + + _use_allow_list = use_allow_list; + + open_service(); + + // start periodically check job + _current_task = tasking::enqueue(LPC_BEACON_CHECK, this, &failure_detector::process_all_records, -1, _check_interval_milliseconds, _check_interval_milliseconds); + + _is_started = true; + return ERR_SUCCESS; +} + +int failure_detector::stop() +{ + if ( _is_started == false ) + { + return ERR_SUCCESS; + } + + _is_started = false; + + close_service(); + + if (_current_task != nullptr) + { + _current_task->cancel(true); + _current_task = nullptr; + } + + return ERR_SUCCESS; +} + +void failure_detector::register_master(const end_point& target) +{ + uint64_t now = now_ms(); + + zauto_lock l(_lock); + + master_record record(target, now, now + _beacon_interval_milliseconds); + + auto ret = _masters.insert(std::make_pair(target, record)); + if (ret.second) + { + dinfo( + "register_rpc_handler master successfully, target machine ip [%u], port[%u]", + target.ip, static_cast(target.port)); + } + else + { + // active the beacon again in case previously local node is not in target's allow list + ret.first->second.rejected = false; + dinfo( + "master already registered, for target machine: target machine ip [%u], port[%u]", + target.ip, static_cast(target.port)); + } + + send_beacon(target, now_ms()); +} + +bool failure_detector::switch_master(const end_point& from, const end_point& to) +{ + { + zauto_lock l(_lock); + + auto it = _masters.find(from); + auto it2 = _masters.find(to); + if (it != _masters.end()) + { + if (it2 != _masters.end()) + { + dinfo( + "master switch, switch master from %s:%d to %s:%d failed as both are already registered", + from.name.c_str(), static_cast(from.port), + to.name.c_str(), static_cast(to.port) + ); + return false; + } + + it->second.node = to; + it->second.rejected = false; + _masters.insert(std::make_pair(to, it->second)); + _masters.erase(from); + + dinfo( + "master switch, switch master from %s:%d to %s:%d succeeded", + from.name.c_str(), static_cast(from.port), + to.name.c_str(), static_cast(to.port) + ); + } + else + { + dinfo( + "master switch, switch master from %s:%d to %s:%d failed as the former has not been registered yet", + from.name.c_str(), static_cast(from.port), + to.name.c_str(), static_cast(to.port) + ); + return false; + } + } + + send_beacon(to, now_ms()); + return true; +} + +bool failure_detector::is_time_greater_than(uint64_t ts, uint64_t base) +{ + uint64_t delta = ts - base; + if (delta <= 24ULL*3600ULL*1000ULL) + return true; + else + return false; +} + +void failure_detector::report(const end_point& node, bool is_master, bool is_connected) +{ + ddebug("%s %s:%hu %sconnected", is_master ? "master":"worker", node.name.c_str(), node.port, is_connected ? "" : "dis"); + + printf ("%s %s:%hu %sconnected\n", is_master ? "master":"worker", node.name.c_str(), node.port, is_connected ? "" : "dis"); +} + +/* + |--- lease period ----|lease IsExpired, commit suicide + |--- lease period ---| + worker: ----------------------------------------------------------------> + \ / \ / _\ + beacon ack beacon ack x (beacon deliver failed) + _\/ _\/ + master: ----------------------------------------------------------------> + |---- grace period ----| + |--- grace period ----| grace IsExpired, declare worker dead +*/ + +void failure_detector::process_all_records() +{ + if (!_is_started) + { + return; + } + + + zauto_lock l(_lock); + + std::vector expire; + uint64_t now =now_ms(); + + master_map::iterator itr = _masters.begin(); + for (; itr != _masters.end() ; itr++) + { + master_record& record = itr->second; + if (is_time_greater_than(now, record.next_beacon_time)) + { + if (!record.rejected || random32(0, 40) <= 10) + { + record.next_beacon_time = now + _beacon_interval_milliseconds; + send_beacon(record.node, now); + } + } + + if (record.is_alive + && now - record.last_send_time_for_beacon_with_ack >= _lease_milliseconds) + { + expire.push_back(record.node); + record.is_alive = false; + + report(record.node, true, false); + } + } + + if ( expire.size() > 0 ) + { + on_master_disconnected(expire); + } + + // process recv record, for server + expire.clear(); + now =now_ms(); + + worker_map::iterator itq = _workers.begin(); + for ( ; itq != _workers.end() ; itq++) + { + worker_record& record = itq->second; + + if (record.is_alive != false + && now - record.last_beacon_recv_time > _grace_milliseconds) + { + expire.push_back(record.node); + record.is_alive = false; + + report(record.node, false, false); + } + } + + if ( expire.size() > 0 ) + { + on_worker_disconnected(expire); + } +} + +void failure_detector::add_allow_list( const end_point& node) +{ + zauto_lock l(_lock); + _allow_list.insert(node); +} + +bool failure_detector::remove_from_allow_list( const end_point& node) +{ + zauto_lock l(_lock); + return _allow_list.erase(node) > 0; +} + +void failure_detector::on_ping_internal(const beacon_msg& beacon, __out_param beacon_ack& ack) +{ + ack.is_master = true; + ack.this_node = beacon.to; + ack.primary_node = primary_address(); + ack.time = beacon.time; + ack.allowed = true; + + zauto_lock l(_lock); + + uint64_t now = now_ms(); + auto node = beacon.from; + + worker_map::iterator itr = _workers.find(node); + if (itr == _workers.end()) + { + if (_use_allow_list && _allow_list.find(node) == _allow_list.end()) + { + ddebug("Client %s:%hu is rejected", node.name.c_str(), node.port); + ack.allowed = false; + return; + } + + // create new entry for node + worker_record record(node, now); + _workers.insert(std::make_pair(node, record)); + + itr = _workers.find(node); + dassert(itr != _workers.end(), "cannot find the worker"); + + itr->second.is_alive = true; + + report(node, false, true); + on_worker_connected(node); + } + else if (is_time_greater_than(now, itr->second.last_beacon_recv_time)) + { + itr->second.last_beacon_recv_time = now; + + if (itr->second.is_alive == false) + { + itr->second.is_alive = true; + + report(node, false, true); + on_worker_connected(node); + } + } +} + +void failure_detector::on_ping(const beacon_msg& beacon, ::dsn::service::rpc_replier& reply) +{ + beacon_ack ack; + on_ping_internal(beacon, ack); + reply(ack); +} + +void failure_detector::end_ping(::dsn::error_code err, const beacon_ack& ack, void* context) +{ + if (err) return; + + uint64_t beacon_send_time = ack.time; + auto node = ack.this_node; + + zauto_lock l(_lock); + + uint64_t now = now_ms(); + + master_map::iterator itr = _masters.find(node); + + if ( itr == _masters.end() ) + { + dwarn("Failure in process beacon ack in liveness monitor, received beacon ack without corresponding beacon record, remote node name[%s], local node name[%s]", + node.name.c_str(), primary_address().name.c_str()); + + return; + } + + master_record& record = itr->second; + if (!ack.allowed) + { + ddebug( "Server %s:%hu rejected me as i'm not in its allow list, stop sending beacon message", node.name.c_str(), node.port); + record.rejected = true; + return; + } + + if (is_time_greater_than(beacon_send_time, record.last_send_time_for_beacon_with_ack)) + { + record.last_send_time_for_beacon_with_ack = beacon_send_time; + record.rejected = false; + } + else + { + return; + } + + if (record.is_alive == false + && now - record.last_send_time_for_beacon_with_ack <= _lease_milliseconds) + { + report(node, true, true); + itr->second.is_alive = true; + on_master_connected(node); + } +} + +bool failure_detector::unregister_master(const end_point & node) +{ + zauto_lock l(_lock); + + bool ret; + + size_t count = _masters.erase(node); + + if ( count == 0 ) + { + ret = false; + } + else + { + ret = true; + } + + dinfo("remove send record sucessfully, removed node [%s], removed entry count [%u]", + node.name.c_str(), (uint32_t)count); + + return ret; +} + +bool failure_detector::is_master_connected( const end_point& node) const +{ + zauto_lock l(_lock); + auto it = _masters.find(node); + if (it != _masters.end()) + return it->second.is_alive; + else + return false; +} + +void failure_detector::register_worker( const end_point& target, bool is_connected) +{ + uint64_t now = now_ms(); + + zauto_lock l(_lock); + + worker_record record(target, now); + record.is_alive = is_connected ? true : false; + + auto ret = _workers.insert(std::make_pair(target, record)); + if ( ret.second ) + { + dinfo( + "register_rpc_handler worker successfully", "target machine ip [%u], port[%u]", + target.ip, static_cast(target.port)); + } + else + { + dinfo( + "worker already registered", "for target machine: target machine ip [%u], port[%u]", + target.ip, static_cast(target.port)); + } +} + +bool failure_detector::unregister_worker(const end_point& node) +{ + zauto_lock l(_lock); + + bool ret; + + size_t count = _workers.erase(node); + + if ( count == 0 ) + { + ret = false; + } + else + { + ret = true; + } + + dinfo("remove recv record sucessfully, removed node [%s], removed entry count [%u]", + node.name.c_str(), (uint32_t)count); + return ret; +} + +void failure_detector::clear_workers() +{ + zauto_lock l(_lock); + _workers.clear(); +} + +bool failure_detector::is_worker_connected( const end_point& node) const +{ + zauto_lock l(_lock); + auto it = _workers.find(node); + if (it != _workers.end()) + return it->second.is_alive; + else + return false; +} + +void failure_detector::send_beacon(const end_point& target, uint64_t time) +{ + beacon_msg beacon; + beacon.time = time; + beacon.from = primary_address(); + beacon.to = target; + + begin_ping( + beacon, + nullptr, + 0, + static_cast(_check_interval_milliseconds), + 0, + &target + ); +} + +}} // end namespace diff --git a/src/dist/failure_detector/fd.thrift b/src/dist/failure_detector/fd.thrift new file mode 100644 index 0000000000..031a3eea05 --- /dev/null +++ b/src/dist/failure_detector/fd.thrift @@ -0,0 +1,24 @@ +include "../../dsn.thrift" + +namespace cpp dsn.fd + +struct beacon_msg +{ + 1: i64 time; + 2: dsn.end_point from; + 3: dsn.end_point to; +} + +struct beacon_ack +{ + 1: i64 time; + 2: dsn.end_point this_node; + 3: dsn.end_point primary_node; + 4: bool is_master; + 5: bool allowed; +} + +service failure_detector +{ + beacon_ack ping(1:beacon_msg beacon) +} diff --git a/src/dsn.thrift b/src/dsn.thrift new file mode 100644 index 0000000000..e354bee6b0 --- /dev/null +++ b/src/dsn.thrift @@ -0,0 +1,13 @@ +namespace cpp dsn + +struct end_point +{ + 1: i32 ip; + 2: i16 port; + 3: string name; +} + +// place holder +struct blob +{ +} diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt new file mode 100644 index 0000000000..7b8332837d --- /dev/null +++ b/src/tools/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(common) +add_subdirectory(simulator) diff --git a/src/tools/common/CMakeLists.txt b/src/tools/common/CMakeLists.txt new file mode 100644 index 0000000000..fcc6b86b11 --- /dev/null +++ b/src/tools/common/CMakeLists.txt @@ -0,0 +1 @@ +dsn_add_library(dsn.tools.common) diff --git a/src/tools/common/fault_injector.cpp b/src/tools/common/fault_injector.cpp new file mode 100644 index 0000000000..c0962cb63b --- /dev/null +++ b/src/tools/common/fault_injector.cpp @@ -0,0 +1,232 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#define __TITLE__ "toollet.fault_injector" + +namespace dsn { + namespace tools { + + struct fj_opt + { + bool fault_injection_enabled; + + // io failure + double rpc_request_drop_ratio; + double rpc_response_drop_ratio; + double disk_read_fail_ratio; + double disk_write_fail_ratio; + + // delay + uint32_t rpc_message_delay_ms_min; + uint32_t rpc_message_delay_ms_max; + uint32_t disk_io_delay_ms_min; + uint32_t disk_io_delay_ms_max; + uint32_t execution_extra_delay_us_max; + + //// node crash + //uint32_t node_crash_minutes_min; + //uint32_t node_crash_minutes_max; + }; + + static fj_opt* s_fj_opts = nullptr; + + static void fault_on_task_enqueue(task* caller, task* callee) + { + } + + static void fault_on_task_begin(task* this_) + { + + } + + static void fault_on_task_end(task* this_) + { + fj_opt& opt = s_fj_opts[this_->spec().code]; + if (opt.execution_extra_delay_us_max > 0) + { + auto d = service::env::random32(0, opt.execution_extra_delay_us_max); + std::this_thread::sleep_for(std::chrono::microseconds(d)); + } + } + + static void fault_on_task_cancelled(task* this_) + { + } + + static void fault_on_task_wait_pre(task* caller, task* callee, uint32_t timeout_ms) + { + + } + + static void fault_on_task_wait_post(task* caller, task* callee, bool succ) + { + + } + + static void fault_on_task_cancel_post(task* caller, task* callee, bool succ) + { + + } + + // return true means continue, otherwise early terminate with task::set_error_code + static bool fault_on_aio_call(task* caller, aio_task* callee) + { + switch (callee->aio()->type) + { + case AIO_Read: + if (service::env::probability() < s_fj_opts[callee->spec().code].disk_read_fail_ratio) + { + callee->set_error_code(ERR_FILE_OPERATION_FAILED); + return false; + } + break; + case AIO_Write: + if (service::env::probability() < s_fj_opts[callee->spec().code].disk_write_fail_ratio) + { + callee->set_error_code(ERR_FILE_OPERATION_FAILED); + return false; + } + break; + } + + return true; + } + + static void fault_on_aio_enqueue(aio_task* this_) + { + fj_opt& opt = s_fj_opts[this_->spec().code]; + this_->set_delay(service::env::random32(opt.disk_io_delay_ms_min, opt.disk_io_delay_ms_max)); + } + + // return true means continue, otherwise early terminate with task::set_error_code + static bool fault_on_rpc_call(task* caller, message* req, rpc_response_task* callee) + { + fj_opt& opt = s_fj_opts[req->header().local_rpc_code]; + if (service::env::probability() < opt.rpc_request_drop_ratio) + { + return false; + } + else + { + return true; + } + } + + static void fault_on_rpc_request_enqueue(rpc_request_task* callee) + { + fj_opt& opt = s_fj_opts[callee->spec().code]; + callee->set_delay(service::env::random32(opt.rpc_message_delay_ms_min, opt.rpc_message_delay_ms_max)); + } + + // return true means continue, otherwise early terminate with task::set_error_code + static bool fault_on_rpc_reply(task* caller, message* msg) + { + fj_opt& opt = s_fj_opts[msg->header().local_rpc_code]; + if (service::env::probability() < opt.rpc_response_drop_ratio) + { + return false; + } + else + { + return true; + } + } + + static void fault_on_rpc_response_enqueue(rpc_response_task* resp) + { + fj_opt& opt = s_fj_opts[resp->spec().code]; + resp->set_delay(service::env::random32(opt.rpc_message_delay_ms_min, opt.rpc_message_delay_ms_max)); + } + + void fault_injector::install(service_spec& spec) + { + s_fj_opts = new fj_opt[task_code::max_value() + 1]; + + fj_opt default_opt; + default_opt.fault_injection_enabled = config()->get_value("task.default", "fault_injection_enabled", true); + + default_opt.rpc_response_drop_ratio = config()->get_value("task.default", "rpc_response_drop_ratio", 0.0001); + default_opt.rpc_request_drop_ratio = config()->get_value("task.default", "rpc_request_drop_ratio", 0.0001); + default_opt.disk_read_fail_ratio = config()->get_value("task.default", "disk_read_fail_ratio", 0.0); + default_opt.disk_write_fail_ratio = config()->get_value("task.default", "disk_write_fail_ratio", 0.0); + + default_opt.rpc_message_delay_ms_min = config()->get_value("task.default", "rpc_message_delay_ms_min", 0); + default_opt.rpc_message_delay_ms_max = config()->get_value("task.default", "rpc_message_delay_ms_max", 1000); + default_opt.disk_io_delay_ms_min = config()->get_value("task.default", "disk_io_delay_ms_min", 1); + default_opt.disk_io_delay_ms_max = config()->get_value("task.default", "disk_io_delay_ms_max", 12); + default_opt.execution_extra_delay_us_max = config()->get_value("task.default", "execution_extra_delay_us_max", 0); + + for (int i = 0; i <= task_code::max_value(); i++) + { + if (i == TASK_CODE_INVALID) + continue; + + std::string section_name = std::string("task.") + std::string(task_code::to_string(i)); + task_spec* spec = task_spec::get(i); + dassert (spec != nullptr, "task_spec cannot be null"); + + fj_opt& lopt = s_fj_opts[i]; + lopt.fault_injection_enabled = config()->get_value(section_name.c_str(), "fault_injection_enabled", default_opt.fault_injection_enabled); + + lopt.rpc_response_drop_ratio = config()->get_value(section_name.c_str(), "rpc_response_drop_ratio", default_opt.rpc_response_drop_ratio); + lopt.rpc_request_drop_ratio = config()->get_value(section_name.c_str(), "rpc_request_drop_ratio", default_opt.rpc_request_drop_ratio); + lopt.disk_read_fail_ratio = config()->get_value(section_name.c_str(), "disk_read_fail_ratio", default_opt.disk_read_fail_ratio); + lopt.disk_write_fail_ratio = config()->get_value(section_name.c_str(), "disk_write_fail_ratio", default_opt.disk_write_fail_ratio); + + lopt.rpc_message_delay_ms_min = config()->get_value(section_name.c_str(), "rpc_message_delay_ms_min", default_opt.rpc_message_delay_ms_min); + lopt.rpc_message_delay_ms_max = config()->get_value(section_name.c_str(), "rpc_message_delay_ms_max", default_opt.rpc_message_delay_ms_max); + lopt.disk_io_delay_ms_min = config()->get_value(section_name.c_str(), "disk_io_delay_ms_min", default_opt.disk_io_delay_ms_min); + lopt.disk_io_delay_ms_max = config()->get_value(section_name.c_str(), "disk_io_delay_ms_max", default_opt.disk_io_delay_ms_max); + lopt.execution_extra_delay_us_max = config()->get_value(section_name.c_str(), "execution_extra_delay_us_max", default_opt.execution_extra_delay_us_max); + + if (!lopt.fault_injection_enabled) + continue; + + //spec->on_task_enqueue.put_back(fault_on_task_enqueue, "fault_injector"); + //spec->on_task_begin.put_back(fault_on_task_begin, "fault_injector"); + spec->on_task_end.put_back(fault_on_task_end, "fault_injector"); + //spec->on_task_cancelled.put_back(fault_on_task_cancelled, "fault_injector"); + //spec->on_task_wait_pre.put_back(fault_on_task_wait_pre, "fault_injector"); + //spec->on_task_wait_post.put_back(fault_on_task_wait_post, "fault_injector"); + //spec->on_task_cancel_post.put_back(fault_on_task_cancel_post, "fault_injector"); + spec->on_aio_call.put_native(fault_on_aio_call); + spec->on_aio_enqueue.put_back(fault_on_aio_enqueue, "fault_injector"); + spec->on_rpc_call.put_native(fault_on_rpc_call); + spec->on_rpc_request_enqueue.put_back(fault_on_rpc_request_enqueue, "fault_injector"); + spec->on_rpc_reply.put_native(fault_on_rpc_reply); + spec->on_rpc_response_enqueue.put_back(fault_on_rpc_response_enqueue, "fault_injector"); + } + } + + fault_injector::fault_injector(const char* name) + : toollet(name) + { + } + } +} diff --git a/src/tools/common/lockp.std.cpp b/src/tools/common/lockp.std.cpp new file mode 100644 index 0000000000..e908bc7fda --- /dev/null +++ b/src/tools/common/lockp.std.cpp @@ -0,0 +1,27 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "lockp.std.h" + diff --git a/src/tools/common/lockp.std.h b/src/tools/common/lockp.std.h new file mode 100644 index 0000000000..47bf1d27b1 --- /dev/null +++ b/src/tools/common/lockp.std.h @@ -0,0 +1,79 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include +#include + +namespace dsn { namespace tools { + +class std_lock_provider : public lock_provider +{ +public: + std_lock_provider(dsn::service::zlock *lock, lock_provider* inner_provider) : lock_provider(lock, inner_provider) {} + + virtual void lock() { _lock.lock(); } + virtual bool try_lock() { return _lock.try_lock(); } + virtual void unlock() { _lock.unlock(); } + +private: + std::recursive_mutex _lock; +}; + +class std_rwlock_provider : public rwlock_provider +{ +public: + std_rwlock_provider(dsn::service::zrwlock *lock, rwlock_provider* inner_provider) : rwlock_provider(lock, inner_provider) {} + + virtual void lock_read() { _lock.lock_read(); } + virtual bool try_lock_read() { return _lock.try_lock_read(); } + virtual void unlock_read() { _lock.unlock_read(); } + + virtual void lock_write() { _lock.lock_write(); } + virtual bool try_lock_write() { return _lock.try_lock_write(); } + virtual void unlock_write() { _lock.unlock_write(); } + +private: + utils::rw_lock _lock; +}; + +class std_semaphore_provider : public semaphore_provider +{ +public: + std_semaphore_provider(dsn::service::zsemaphore *sema, int initialCount, semaphore_provider *inner_provider) + : semaphore_provider(sema, initialCount, inner_provider), _sema(initialCount) + { + } + +public: + virtual void signal(int count) { _sema.signal(count); } + virtual bool wait(int timeout_milliseconds) { return _sema.wait(timeout_milliseconds); } + +private: + dsn::utils::semaphore _sema; +}; + +}} // end namespace dsn::tools diff --git a/src/tools/common/native_aio_provider.posix.cpp b/src/tools/common/native_aio_provider.posix.cpp new file mode 100644 index 0000000000..aab0741d5d --- /dev/null +++ b/src/tools/common/native_aio_provider.posix.cpp @@ -0,0 +1,170 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "native_aio_provider.posix.h" + +#if defined(__MACH__) || defined(__linux__) + +#include +#include +#include + +#define __TITLE__ "aio.provider.posix" + +namespace dsn { + namespace tools { + + native_posix_aio_provider::native_posix_aio_provider(disk_engine* disk, aio_provider* inner_provider) + : aio_provider(disk, inner_provider) + { + } + + native_posix_aio_provider::~native_posix_aio_provider() + { + } + + handle_t native_posix_aio_provider::open(const char* file_name, int flag, int pmode) + { + return (handle_t)::open(file_name, flag, pmode); + } + + error_code native_posix_aio_provider::close(handle_t hFile) + { + // TODO: handle failure + ::close(static_cast(hFile)); + return ERR_SUCCESS; + } + + struct posix_disk_aio_context : public disk_aio + { + struct aiocb cb; + aio_task* tsk; + native_posix_aio_provider* this_; + utils::notify_event* evt; + error_code err; + uint32_t bytes; + }; + + disk_aio_ptr native_posix_aio_provider::prepare_aio_context(aio_task* tsk) + { + auto r = new posix_disk_aio_context; + bzero((char*)&r->cb, sizeof(r->cb)); + r->tsk = tsk; + r->evt = nullptr; + return disk_aio_ptr(r); + } + + void native_posix_aio_provider::aio(aio_task_ptr& aio_tsk) + { + aio_internal(aio_tsk, true); + } + + void aio_completed(sigval sigval) + { + auto ctx = (posix_disk_aio_context *)sigval.sival_ptr; + + int err = aio_error(&ctx->cb); + if (err != EINPROGRESS) + { + if (err != 0) + { + derror("file operation failed, errno = %d", errno); + } + + size_t bytes = aio_return(&ctx->cb); // from e.g., read or write + if (!ctx->evt) + { + aio_task_ptr aio(ctx->tsk); + ctx->this_->complete_io(aio, err == 0 ? ERR_SUCCESS : ERR_FILE_OPERATION_FAILED, bytes); + } + else + { + ctx->err = err == 0 ? ERR_SUCCESS : ERR_FILE_OPERATION_FAILED; + ctx->bytes = bytes; + ctx->evt->notify(); + } + } + } + + error_code native_posix_aio_provider::aio_internal(aio_task_ptr& aio_tsk, bool async, __out_param uint32_t* pbytes /*= nullptr*/) + { + auto aio = (posix_disk_aio_context *)aio_tsk->aio().get(); + int r; + + aio->this_ = this; + aio->cb.aio_fildes = static_cast((ssize_t)aio->file); + aio->cb.aio_buf = aio->buffer; + aio->cb.aio_nbytes = aio->buffer_size; + aio->cb.aio_offset = aio->file_offset; + + // set up callback + aio->cb.aio_sigevent.sigev_notify = SIGEV_THREAD; + aio->cb.aio_sigevent.sigev_notify_function = aio_completed; + aio->cb.aio_sigevent.sigev_notify_attributes = nullptr; + aio->cb.aio_sigevent.sigev_value.sival_ptr = aio; + + if (!async) + { + aio->evt = new utils::notify_event(); + aio->err = ERR_SUCCESS; + aio->bytes = 0; + } + + switch (aio->type) + { + case AIO_Read: + r = aio_read(&aio->cb); + break; + case AIO_Write: + r = aio_write(&aio->cb); + break; + default: + dassert (false, "unknown aio type %u", static_cast(aio->type)); + break; + } + + if (r < 0) + { + derror("file op faile, err = %d", errno); + } + + if (async) + { + return ERR_IO_PENDING; + } + else + { + aio->evt->wait(); + delete aio->evt; + aio->evt = nullptr; + *pbytes = aio->bytes; + return aio->err; + } + } + + } +} // end namespace dsn::tools +#endif diff --git a/src/tools/common/native_aio_provider.posix.h b/src/tools/common/native_aio_provider.posix.h new file mode 100644 index 0000000000..320e1fd06d --- /dev/null +++ b/src/tools/common/native_aio_provider.posix.h @@ -0,0 +1,59 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +# include +# if defined(__MACH__) || defined(__linux__) +# include +# include +# endif + +namespace dsn { + namespace tools { + class native_posix_aio_provider : public aio_provider + { + public: + native_posix_aio_provider(disk_engine* disk, aio_provider* inner_provider); + ~native_posix_aio_provider(); + + virtual handle_t open(const char* file_name, int flag, int pmode); + virtual error_code close(handle_t hFile); + virtual void aio(aio_task_ptr& aio); + virtual disk_aio_ptr prepare_aio_context(aio_task* tsk); + + protected: + error_code aio_internal(aio_task_ptr& aio, bool async, __out_param uint32_t* pbytes = nullptr); + + private: +# if defined(__MACH__) || defined(__linux__) + friend void aio_completed(sigval sigval); +# endif + }; + } +} + + diff --git a/src/tools/common/native_aio_provider.win.cpp b/src/tools/common/native_aio_provider.win.cpp new file mode 100644 index 0000000000..6735bda5f1 --- /dev/null +++ b/src/tools/common/native_aio_provider.win.cpp @@ -0,0 +1,327 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifdef _WIN32 + +#include "native_aio_provider.win.h" +#include +#include +#include +#include +#include + +#define __TITLE__ "aio.provider.native" + +namespace dsn { namespace tools { + +native_win_aio_provider::native_win_aio_provider(disk_engine* disk, aio_provider* inner_provider) +: aio_provider(disk, inner_provider) +{ + _iocp = ::CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, NULL, 0); + _worker_thr = new std::thread(std::bind(&native_win_aio_provider::worker, this)); + ::SetThreadPriority(_worker_thr->native_handle(), THREAD_PRIORITY_HIGHEST); +} + +native_win_aio_provider::~native_win_aio_provider() +{ + if (_worker_thr != nullptr && _iocp != NULL && _iocp != INVALID_HANDLE_VALUE) + { + ::PostQueuedCompletionStatus(_iocp, 0, 1, NULL); + + _worker_thr->join(); + ::CloseHandle(_iocp); + _iocp = INVALID_HANDLE_VALUE; + delete _worker_thr; + _worker_thr = nullptr; + } +} + +handle_t native_win_aio_provider::open(const char* file_name, int oflag, int pmode) +{ + DWORD dwDesiredAccess = 0; + DWORD dwShareMode = FILE_SHARE_READ; + DWORD dwCreationDisposition = 0; + DWORD dwFlagsAndAttributes = FILE_FLAG_OVERLAPPED; + + SECURITY_ATTRIBUTES SecurityAttributes; + + SecurityAttributes.nLength = sizeof(SecurityAttributes); + SecurityAttributes.lpSecurityDescriptor = NULL; + + if (oflag & _O_NOINHERIT) { + SecurityAttributes.bInheritHandle = FALSE; + } + else { + SecurityAttributes.bInheritHandle = TRUE; + } + + /* + * decode the access flags + */ + switch (oflag & (_O_RDONLY | _O_WRONLY | _O_RDWR)) { + + case _O_RDONLY: /* read access */ + dwDesiredAccess = GENERIC_READ; + break; + case _O_WRONLY: /* write access */ + /* giving it read access as well + * because in append (a, not a+), we need + * to read the BOM to determine the encoding + * (ie. ANSI, UTF8, UTF16) + */ + if ((oflag & _O_APPEND) + && (oflag & (_O_WTEXT | _O_U16TEXT | _O_U8TEXT)) != 0) + { + dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; + } + else + { + dwDesiredAccess = GENERIC_WRITE; + } + break; + case _O_RDWR: /* read and write access */ + dwDesiredAccess = GENERIC_READ | GENERIC_WRITE; + break; + default: /* error, bad oflag */ + _doserrno = 0L; /* not an OS error */ + derror("Invalid open flag\n"); + } + + /* + * decode open/create method flags + */ + switch (oflag & (_O_CREAT | _O_EXCL | _O_TRUNC)) { + case 0: + case _O_EXCL: // ignore EXCL w/o CREAT + dwCreationDisposition = OPEN_EXISTING; + break; + + case _O_CREAT: + dwCreationDisposition = OPEN_ALWAYS; + break; + + case _O_CREAT | _O_EXCL: + case _O_CREAT | _O_TRUNC | _O_EXCL: + dwCreationDisposition = CREATE_NEW; + break; + + case _O_TRUNC: + case _O_TRUNC | _O_EXCL: // ignore EXCL w/o CREAT + dwCreationDisposition = TRUNCATE_EXISTING; + break; + + case _O_CREAT | _O_TRUNC: + dwCreationDisposition = CREATE_ALWAYS; + break; + + default: + // this can't happen ... all cases are covered + _doserrno = 0L; + derror("Invalid open flag"); + } + + /* + * try to open/create the file + */ + HANDLE fileHandle = ::CreateFileA(file_name, + dwDesiredAccess, + dwShareMode, + &SecurityAttributes, + dwCreationDisposition, + dwFlagsAndAttributes, + 0); + + if (fileHandle != INVALID_HANDLE_VALUE && fileHandle != nullptr) + { + if (_iocp != ::CreateIoCompletionPort(fileHandle, _iocp, 0, 0)) + { + dassert(false, "cannot associate file handle %s to io completion port, err = %x\n", file_name, ::GetLastError()); + return nullptr; + } + else + { + return fileHandle; + } + } + else + { + dassert(false, "cannot create file %s, err = %x\n", file_name, ::GetLastError()); + return nullptr; + } +} + +error_code native_win_aio_provider::close(handle_t hFile) +{ + if (::CloseHandle(hFile)) + return ERR_SUCCESS; + else + { + derror("close file failed, err = %x\n", ::GetLastError()); + return ERR_FILE_OPERATION_FAILED; + } +} + +struct windows_disk_aio_context : public disk_aio +{ + OVERLAPPED olp; + aio_task* tsk; + utils::notify_event* evt; + error_code err; + uint32_t bytes; +}; + +disk_aio_ptr native_win_aio_provider::prepare_aio_context(aio_task* tsk) +{ + auto r = new windows_disk_aio_context; + ZeroMemory(&r->olp, sizeof(r->olp)); + r->tsk = tsk; + r->evt = nullptr; + return disk_aio_ptr(r); +} + +void native_win_aio_provider::aio(aio_task_ptr& aio_tsk) +{ + aio_internal(aio_tsk, true); +} + +error_code native_win_aio_provider::aio_internal(aio_task_ptr& aio_tsk, bool async, __out_param uint32_t* pbytes /*= nullptr*/) +{ + auto aio = (windows_disk_aio_context*)aio_tsk->aio().get(); + BOOL r = FALSE; + + aio->olp.Offset = (uint32_t)aio->file_offset; + aio->olp.OffsetHigh = (uint32_t)(aio->file_offset >> 32); + + if (!async) + { + aio->evt = new utils::notify_event(); + aio->err = ERR_SUCCESS; + aio->bytes = 0; + } + + switch (aio->type) + { + case AIO_Read: + r = ::ReadFile(aio->file, aio->buffer, aio->buffer_size, NULL, &aio->olp); + break; + case AIO_Write: + r = ::WriteFile(aio->file, aio->buffer, aio->buffer_size, NULL, &aio->olp); + break; + default: + dassert (false, "unknown aio type %u", static_cast(aio->type)); + break; + } + + if (!r) + { + int err = ::GetLastError(); + + if (err != ERROR_IO_PENDING) + { + derror("file operation failed, err = %u", err); + + if (async) + { + complete_io(aio_tsk, ERR_FILE_OPERATION_FAILED, 0); + } + else + { + delete aio->evt; + aio->evt = nullptr; + } + + return ERR_FILE_OPERATION_FAILED; + } + } + + if (async) + { + return ERR_IO_PENDING; + } + else + { + aio->evt->wait(); + delete aio->evt; + aio->evt = nullptr; + *pbytes = aio->bytes; + return aio->err; + } +} + +void native_win_aio_provider::worker() +{ + DWORD dwTransLen; + DWORD dwErrorCode; + ULONG_PTR dwKey; + LPOVERLAPPED overLap; + + do + { + bool ret = (0 != GetQueuedCompletionStatus(_iocp, &dwTransLen, &dwKey, &overLap, INFINITE)); + + if (dwKey) break; + + if (ret) + { + windows_disk_aio_context* ctx = CONTAINING_RECORD(overLap, windows_disk_aio_context, olp); + if (!ctx->evt) + { + aio_task_ptr aio(ctx->tsk); + complete_io(aio, ERR_SUCCESS, dwTransLen); + } + else + { + ctx->err = ERR_SUCCESS; + ctx->bytes = dwTransLen; + ctx->evt->notify(); + } + } + else if (overLap) + { + dwErrorCode = ::GetLastError(); + derror("file operation failed, err = %u", dwErrorCode); + + windows_disk_aio_context* ctx = CONTAINING_RECORD(overLap, windows_disk_aio_context, olp); + if (!ctx->evt) + { + aio_task_ptr aio(ctx->tsk); + complete_io(aio, ERR_FILE_OPERATION_FAILED, dwTransLen); + } + else + { + ctx->err = ERR_FILE_OPERATION_FAILED; + ctx->bytes = dwTransLen; + ctx->evt->notify(); + } + } + else + { + ::Sleep(1); + } + } while (true); +} + +}} // end namespace dsn::tools +#endif diff --git a/src/tools/common/native_aio_provider.win.h b/src/tools/common/native_aio_provider.win.h new file mode 100644 index 0000000000..f33e942b32 --- /dev/null +++ b/src/tools/common/native_aio_provider.win.h @@ -0,0 +1,55 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +# include + +namespace dsn { + namespace tools { + class native_win_aio_provider : public aio_provider + { + public: + native_win_aio_provider(disk_engine* disk, aio_provider* inner_provider); + ~native_win_aio_provider(); + + virtual handle_t open(const char* file_name, int flag, int pmode); + virtual error_code close(handle_t hFile); + virtual void aio(aio_task_ptr& aio); + virtual disk_aio_ptr prepare_aio_context(aio_task* tsk); + + protected: + error_code aio_internal(aio_task_ptr& aio, bool async, __out_param uint32_t* pbytes = nullptr); + + private: + void worker(); + std::thread *_worker_thr; + handle_t _iocp; + }; + } +} + + diff --git a/src/tools/common/nativerun.cpp b/src/tools/common/nativerun.cpp new file mode 100644 index 0000000000..b0f3491837 --- /dev/null +++ b/src/tools/common/nativerun.cpp @@ -0,0 +1,83 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include + +namespace dsn { + namespace tools { + + void nativerun::install(service_spec& spec) + { + register_common_providers(); + + if (spec.aio_factory_name == "") + { + spec.aio_factory_name = ("dsn::tools::native_aio_provider"); + } + + if (spec.env_factory_name == "") + spec.env_factory_name = ("dsn::env_provider"); + + network_config_spec_default cs; + cs.factory_name = "dsn::tools::asio_network_provider"; + cs.message_buffer_block_size = 1024 * 64; + spec.network_default_configs[RPC_CHANNEL_TCP] = cs; + + if (spec.perf_counter_factory_name == "") + spec.perf_counter_factory_name = "dsn::tools::simple_perf_counter"; + + if (spec.logging_factory_name == "") + spec.logging_factory_name = "dsn::tools::simple_logger"; + + if (spec.lock_factory_name == "") + spec.lock_factory_name = ("dsn::tools::std_lock_provider"); + + if (spec.rwlock_factory_name == "") + spec.rwlock_factory_name = ("dsn::tools::std_rwlock_provider"); + + if (spec.semaphore_factory_name == "") + spec.semaphore_factory_name = ("dsn::tools::std_semaphore_provider"); + + for (auto it = spec.threadpool_specs.begin(); it != spec.threadpool_specs.end(); it++) + { + threadpool_spec& tspec = *it; + + if (tspec.worker_factory_name == "") + tspec.worker_factory_name = ("dsn::task_worker"); + + if (tspec.queue_factory_name == "") + tspec.queue_factory_name = ("dsn::tools::simple_task_queue"); + } + + } + + void nativerun::run() + { + tool_app::run(); + } + + } +} // end namespace dsn::tools diff --git a/src/tools/common/net_client_session.cpp b/src/tools/common/net_client_session.cpp new file mode 100644 index 0000000000..ccaf80ad05 --- /dev/null +++ b/src/tools/common/net_client_session.cpp @@ -0,0 +1,52 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "net_client_session.h" +# include + +# define __TITLE__ "net.session" + +namespace dsn { + namespace tools { + net_client_session::net_client_session( + asio_network_provider& net, + boost::asio::ip::tcp::socket& socket, + const end_point& remote_addr, + std::shared_ptr& matcher, + std::shared_ptr& parser) + : + _net(net), + rpc_client_session(net, remote_addr, matcher), + client_net_io(remote_addr, socket, parser) + { + } + + net_client_session::~net_client_session() + { + } + } +} + + diff --git a/src/tools/common/net_client_session.h b/src/tools/common/net_client_session.h new file mode 100644 index 0000000000..4a37c46e6b --- /dev/null +++ b/src/tools/common/net_client_session.h @@ -0,0 +1,63 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include "net_provider.h" +# include "net_io.h" + +namespace dsn { + namespace tools { + + class asio_network_provider; + class net_client_session + : public rpc_client_session, public client_net_io + { + public: + net_client_session( + asio_network_provider& net, + boost::asio::ip::tcp::socket& socket, + const end_point& remote_addr, + std::shared_ptr& matcher, + std::shared_ptr& parser + ); + ~net_client_session(); + + virtual void connect() { return client_net_io::connect(); } + virtual void send(message_ptr& msg) { return write(msg); } + virtual void on_closed() { return on_disconnected(); } + virtual void on_message_read(message_ptr& msg) + { + on_recv_reply(msg->header().id, msg, 0); + } + virtual void add_reference() { add_ref(); } + virtual void release_reference() { release_ref(); } + + private: + asio_network_provider &_net; + }; + } +} + diff --git a/src/tools/common/net_io.cpp b/src/tools/common/net_io.cpp new file mode 100644 index 0000000000..e5ac3094da --- /dev/null +++ b/src/tools/common/net_io.cpp @@ -0,0 +1,264 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "net_io.h" +# include +# include "shared_io_service.h" + +# define __TITLE__ "net.boost.asio" + +namespace dsn { + namespace tools { + + net_io::net_io( + const end_point& remote_addr, + boost::asio::ip::tcp::socket& socket, + std::shared_ptr& parser + ) + : + _io_service(shared_io_service::instance().ios), + _socket(std::move(socket)), + _sq("net_io.send.queue"), + _remote_addr(remote_addr), + _parser(parser) + { + set_options(); + } + + net_io::~net_io() + { + } + + void net_io::set_options() + { + if (_socket.is_open()) + { + try { + boost::asio::socket_base::send_buffer_size option, option2(16 * 1024 * 1024); + _socket.get_option(option); + int old = option.value(); + _socket.set_option(option2); + _socket.get_option(option); + + /*ddebug("boost asio send buffer size is %u, set as 16MB, now is %u", + old, option.value());*/ + + boost::asio::socket_base::receive_buffer_size option3, option4(16 * 1024 * 1024); + _socket.get_option(option3); + old = option3.value(); + _socket.set_option(option4); + _socket.get_option(option3); + /*ddebug("boost asio recv buffer size is %u, set as 16MB, now is %u", + old, option.value());*/ + } + catch (std::exception& ex) + { + dwarn("network session %s:%d set socket option failed, err = %s", + _remote_addr.to_ip_string().c_str(), + static_cast(_remote_addr.port), + ex.what() + ); + } + } + } + + void net_io::on_failure() + { + close(); + } + + void net_io::close() + { + try { + _socket.shutdown(boost::asio::socket_base::shutdown_type::shutdown_both); + } + catch (std::exception& ex) + { + ex; + /*dwarn("network session %s:%d exits failed, err = %s", + _remote_addr.to_ip_string().c_str(), + static_cast_remote_addr.port, + ex.what() + );*/ + } + + _socket.close(); + on_closed(); + } + + void net_io::do_read(size_t sz) + { + add_reference(); + + void* ptr = _parser->read_buffer_ptr((int)sz); + int remaining = _parser->read_buffer_capacity(); + + _socket.async_read_some(boost::asio::buffer(ptr, remaining), + [this](boost::system::error_code ec, std::size_t length) + { + if (!!ec) + { + on_failure(); + } + else + { + int read_next; + message_ptr msg = _parser->on_read((int)length, read_next); + + while (msg != nullptr) + { + this->on_message_read(msg); + msg = _parser->on_read(0, read_next); + } + + do_read(read_next); + } + + release_reference(); + }); + } + + void net_io::do_write() + { + auto msg = _sq.peek(); + if (nullptr == msg.get()) + return; + + std::vector buffers; + _parser->get_output_buffers(msg, buffers); + + std::vector buffers2; + for (auto& b : buffers) + { + buffers2.push_back(boost::asio::const_buffer(b.data(), b.length())); + } + + add_reference(); + boost::asio::async_write(_socket, buffers2, + [this, msg](boost::system::error_code ec, std::size_t length) + { + if (!!ec) + { + on_failure(); + } + else + { + auto smsg = _sq.dequeue_peeked(); + dassert(smsg == msg, "sent msg must be the first msg in send queue"); + //dinfo("network message sent, rpc_id = %016llx", msg->header().rpc_id); + + do_write(); + } + + release_reference(); + }); + } + + void net_io::write(message_ptr& msg) + { + _sq.enqueue(msg, task_spec::get(msg->header().local_rpc_code)->priority); + do_write(); + } + + // ------------------------------------------------------------ + + client_net_io::client_net_io(const end_point& remote_addr, + boost::asio::ip::tcp::socket& socket, + std::shared_ptr& parser) + : + net_io(remote_addr, socket, parser), + _state(SS_CLOSED), + _reconnect_count(0) + { + } + + void client_net_io::write(message_ptr& msg) + { + _sq.enqueue(msg, task_spec::get(msg->header().local_rpc_code)->priority); + + // not connected + if (SS_CONNECTED != _state) + { + return; + } + + do_write(); + } + + void client_net_io::on_failure() + { + _state = SS_CLOSED; + + if (_reconnect_count++ > 3) + { + close(); + return; + } + + connect(); + } + + void client_net_io::connect() + { + session_state closed_state = SS_CLOSED; + + if (_state.compare_exchange_strong(closed_state, SS_CONNECTING)) + { + boost::asio::ip::tcp::endpoint ep( + boost::asio::ip::address_v4(ntohl(_remote_addr.ip)), _remote_addr.port); + + add_reference(); + _socket.async_connect(ep, [this](boost::system::error_code ec) + { + if (!ec) + { + _reconnect_count = 0; + _state = SS_CONNECTED; + + dinfo("client session %s:%d connected", + _remote_addr.name.c_str(), + static_cast(_remote_addr.port) + ); + + set_options(); + + do_write(); + do_read(); + } + else + { + derror("network client session connect failed, error = %s", + ec.message().c_str() + ); + on_failure(); + } + release_reference(); + }); + } + } + + + } +} \ No newline at end of file diff --git a/src/tools/common/net_io.h b/src/tools/common/net_io.h new file mode 100644 index 0000000000..429ca0af6e --- /dev/null +++ b/src/tools/common/net_io.h @@ -0,0 +1,98 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +# include +# include +# include + +namespace dsn { + namespace tools { + + class net_io + { + public: + net_io(const end_point& remote_addr, + boost::asio::ip::tcp::socket& socket, + std::shared_ptr& parser); + virtual ~net_io(); + + virtual void write(message_ptr& msg); + void close(); + void start_read(size_t sz = 256) { do_read(sz); } + + protected: + void do_read(size_t sz = 256); + void do_write(); + void set_options(); + + virtual void on_failure() = 0; + virtual void on_closed() = 0; + virtual void on_message_read(message_ptr& msg) = 0; + virtual void add_reference() = 0; + virtual void release_reference() = 0; + + protected: + + boost::asio::io_service &_io_service; + boost::asio::ip::tcp::socket _socket; + message_header _read_msg_hdr; + blob _read_buffer; + end_point _remote_addr; + std::shared_ptr _parser; + + // TODO: expose the queue to be customizable + typedef utils::priority_queue send_queue; + send_queue _sq; + }; + + class client_net_io : public net_io + { + public: + client_net_io(const end_point& remote_addr, + boost::asio::ip::tcp::socket& socket, + std::shared_ptr& parser); + + void connect(); + virtual void write(message_ptr& msg); + + private: + virtual void on_failure(); + + private: + enum session_state + { + SS_CONNECTING, + SS_CONNECTED, + SS_CLOSED + }; + + std::atomic _state; + int _reconnect_count; + }; + } +} diff --git a/src/tools/common/net_provider.cpp b/src/tools/common/net_provider.cpp new file mode 100644 index 0000000000..a57b144b62 --- /dev/null +++ b/src/tools/common/net_provider.cpp @@ -0,0 +1,100 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "shared_io_service.h" +#include "net_provider.h" +#include "net_client_session.h" +#include "net_server_session.h" + +namespace dsn { + namespace tools{ + + asio_network_provider::asio_network_provider(rpc_engine* srv, network* inner_provider) + : network(srv, inner_provider), _io_service(shared_io_service::instance().ios) + { + _acceptor = nullptr; + _socket.reset(new boost::asio::ip::tcp::socket(_io_service)); + } + + error_code asio_network_provider::start(rpc_channel channel, int port, bool client_only) + { + if (_acceptor != nullptr) + return ERR_SERVICE_ALREADY_RUNNING; + + _address = end_point(boost::asio::ip::host_name().c_str(), port); + + if (!client_only) + { + auto v4_addr = boost::asio::ip::address_v4::any(); //(ntohl(_address.ip)); + ::boost::asio::ip::tcp::endpoint ep(v4_addr, _address.port); + + try + { + _acceptor.reset(new boost::asio::ip::tcp::acceptor(_io_service, ep, true)); + do_accept(); + } + catch (boost::system::system_error& err) + { + printf("boost asio listen on port %u failed, err: %s\n", port, err.what()); + return ERR_ADDRESS_ALREADY_USED; + } + } + + return ERR_SUCCESS; + } + + rpc_client_session_ptr asio_network_provider::create_client_session(const end_point& server_addr) + { + auto matcher = new_client_matcher(); + auto parser = new_message_parser(); + auto sock = boost::asio::ip::tcp::socket(_io_service); + return rpc_client_session_ptr(new net_client_session(*this, sock, server_addr, matcher, parser)); + } + + void asio_network_provider::do_accept() + { + _acceptor->async_accept(*_socket, + [this](boost::system::error_code ec) + { + if (!ec) + { + end_point client_addr; + client_addr.ip = htonl(_socket->remote_endpoint().address().to_v4().to_ulong()); + client_addr.port = _socket->remote_endpoint().port(); + + // TODO: convert ip to host name + client_addr.name = _socket->remote_endpoint().address().to_string(); + + auto parser = new_message_parser(); + auto sock = std::move(*_socket); + auto s = rpc_server_session_ptr(new net_server_session(*this, client_addr, sock, parser)); + this->on_server_session_accepted(s); + } + + do_accept(); + }); + } + } +} diff --git a/src/tools/common/net_provider.h b/src/tools/common/net_provider.h new file mode 100644 index 0000000000..1419c12ecf --- /dev/null +++ b/src/tools/common/net_provider.h @@ -0,0 +1,57 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +# include + +namespace dsn { + namespace tools { + + class asio_network_provider : public network + { + public: + asio_network_provider(rpc_engine* srv, network* inner_provider); + + virtual error_code start(rpc_channel channel, int port, bool client_only); + virtual const end_point& address() { return _address; } + virtual rpc_client_session_ptr create_client_session(const end_point& server_addr); + + private: + void do_accept(); + + private: + friend class net_server_session; + friend class net_client_session; + + std::shared_ptr _acceptor; + std::shared_ptr _socket; + boost::asio::io_service &_io_service; + end_point _address; + }; + + } +} diff --git a/src/tools/common/net_server_session.cpp b/src/tools/common/net_server_session.cpp new file mode 100644 index 0000000000..2baf38000c --- /dev/null +++ b/src/tools/common/net_server_session.cpp @@ -0,0 +1,56 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# include "net_server_session.h" +# include "shared_io_service.h" +# include +# include + +# define __TITLE__ "net.session" + +namespace dsn { + namespace tools { + net_server_session::net_server_session( + asio_network_provider& net, + const end_point& remote_addr, + boost::asio::ip::tcp::socket& socket, + std::shared_ptr& parser + ) + : _net(net), + rpc_server_session(net, remote_addr), + net_io(remote_addr, socket, parser) + { + start_read(); + } + + net_server_session::~net_server_session() + { + } + + } +} + + diff --git a/src/tools/common/net_server_session.h b/src/tools/common/net_server_session.h new file mode 100644 index 0000000000..7344292cd6 --- /dev/null +++ b/src/tools/common/net_server_session.h @@ -0,0 +1,61 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include "net_provider.h" +# include "net_io.h" + +namespace dsn { + namespace tools { + + class asio_network_provider; + class net_server_session + : public rpc_server_session, public net_io + { + public: + net_server_session( + asio_network_provider& net, + const end_point& remote_addr, + boost::asio::ip::tcp::socket& socket, + std::shared_ptr& parser); + ~net_server_session(); + + virtual void send(message_ptr& reply_msg) { return write(reply_msg); } + virtual void on_failure() { close(); } + virtual void on_closed() { return on_disconnected(); } + virtual void on_message_read(message_ptr& msg) + { + return on_recv_request(msg, 0); + } + virtual void add_reference() { add_ref(); } + virtual void release_reference() { release_ref(); } + + private: + asio_network_provider &_net; + }; + } +} + diff --git a/src/tools/common/network.sim.cpp b/src/tools/common/network.sim.cpp new file mode 100644 index 0000000000..064dc235d1 --- /dev/null +++ b/src/tools/common/network.sim.cpp @@ -0,0 +1,130 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include +#include +#include "network.sim.h" + +#define __TITLE__ "net.provider.sim" + +namespace dsn { namespace tools { + + // multiple machines connect to the same switch, 10 should be >= than rpc_channel::max_value() + 1 + static utils::singleton_store s_switch[10]; + + sim_client_session::sim_client_session(sim_network_provider& net, const end_point& remote_addr, std::shared_ptr& matcher) + : rpc_client_session(net, remote_addr, matcher) + {} + + void sim_client_session::connect() + { + // nothing to do + } + + void sim_client_session::send(message_ptr& msg) + { + sim_network_provider* rnet = nullptr; + if (!s_switch[task_spec::get(msg->header().local_rpc_code)->rpc_call_channel].get(msg->header().to_address, rnet)) + { + dwarn("cannot find destination node %s:%d in simulator", + msg->header().to_address.name.c_str(), + static_cast(msg->header().to_address.port) + ); + return; + } + + auto server_session = rnet->get_server_session(_net.address()); + if (nullptr == server_session) + { + rpc_client_session_ptr cptr = this; + server_session.reset(new sim_server_session(*rnet, _net.address(), cptr)); + rnet->on_server_session_accepted(server_session); + } + + message_ptr recv_msg(new message(msg->writer().get_buffer())); + recv_msg->header().from_address = msg->header().from_address; + recv_msg->header().to_address = msg->header().to_address; + + server_session->on_recv_request(recv_msg, + recv_msg->header().from_address == recv_msg->header().to_address ? + 0 : rnet->net_delay_milliseconds() + ); + } + + sim_server_session::sim_server_session(sim_network_provider& net, const end_point& remote_addr, rpc_client_session_ptr& client) + : rpc_server_session(net, remote_addr) + { + _client = client; + } + + void sim_server_session::send(message_ptr& reply_msg) + { + message_ptr recv_msg(new message(reply_msg->writer().get_buffer())); + recv_msg->header().from_address = reply_msg->header().from_address; + recv_msg->header().to_address = reply_msg->header().to_address; + + _client->on_recv_reply(recv_msg->header().id, recv_msg, + recv_msg->header().from_address == recv_msg->header().to_address ? + 0 : (static_cast(&_net))->net_delay_milliseconds() + ); + } + + sim_network_provider::sim_network_provider(rpc_engine* rpc, network* inner_provider) + : network(rpc, inner_provider), _primary_address("localhost", 1) + { + _min_message_delay_microseconds = 1; + _max_message_delay_microseconds = 100000; + + auto config = tool_app::get_service_spec().config; + if (config != NULL) + { + _min_message_delay_microseconds = config->get_value("dsn.simulation", "min_message_delay_microseconds", _min_message_delay_microseconds); + _max_message_delay_microseconds = config->get_value("dsn.simulation", "max_message_delay_microseconds", _max_message_delay_microseconds); + } + } + + error_code sim_network_provider::start(rpc_channel channel, int port, bool client_only) + { + _primary_address.port = port; + + if (!client_only) + { + if (s_switch[channel].put(_primary_address, this)) + return ERR_SUCCESS; + else + return ERR_ADDRESS_ALREADY_USED; + } + else + { + return ERR_SUCCESS; + } + } + + uint32_t sim_network_provider::net_delay_milliseconds() const + { + return static_cast(dsn::service::env::random32(_min_message_delay_microseconds, _max_message_delay_microseconds)) / 1000; + } +}} // end namespace diff --git a/src/tools/common/network.sim.h b/src/tools/common/network.sim.h new file mode 100644 index 0000000000..11d4acf0f9 --- /dev/null +++ b/src/tools/common/network.sim.h @@ -0,0 +1,81 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include + +namespace dsn { namespace tools { + + class sim_network_provider; + class sim_client_session : public rpc_client_session + { + public: + sim_client_session(sim_network_provider& net, const end_point& remote_addr, std::shared_ptr& matcher); + + virtual void connect(); + virtual void send(message_ptr& msg); + }; + + class sim_server_session : public rpc_server_session + { + public: + sim_server_session(sim_network_provider& net, const end_point& remote_addr, rpc_client_session_ptr& client); + + virtual void send(message_ptr& reply_msg); + + private: + rpc_client_session_ptr _client; + }; + + class sim_network_provider : public network + { + public: + sim_network_provider(rpc_engine* rpc, network* inner_provider); + ~sim_network_provider(void) {} + + virtual error_code start(rpc_channel channel, int port, bool client_only); + + virtual const end_point& address() { return _primary_address; } + + virtual rpc_client_session_ptr create_client_session(const end_point& server_addr) + { + auto matcher = new_client_matcher(); + return rpc_client_session_ptr(new sim_client_session(*this, server_addr, matcher)); + } + + uint32_t net_delay_milliseconds() const; + + private: + end_point _primary_address; + uint32_t _min_message_delay_microseconds; + uint32_t _max_message_delay_microseconds; + }; + + //------------- inline implementations ------------- + + +}} // end namespace + diff --git a/src/tools/common/profiler.cpp b/src/tools/common/profiler.cpp new file mode 100644 index 0000000000..c4d47dae11 --- /dev/null +++ b/src/tools/common/profiler.cpp @@ -0,0 +1,255 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#define __TITLE__ "toollet.profiler" + +namespace dsn { + namespace tools { + + struct task_spec_profiler + { + perf_counter_ptr task_queueing_time_ns; + perf_counter_ptr task_exec_time_ns; + perf_counter_ptr task_throughput; + perf_counter_ptr task_cancelled; + + perf_counter_ptr aio_latency_ns; // for AIO only, from aio call to aio callback task enqueued + perf_counter_ptr rpc_server_latency_ns; // for RPC_RESQUEST ONLY, from rpc request enqueue to rpc response + perf_counter_ptr rpc_client_non_timeout_latency_ns; // for RPC_RESPONSE ONLY, from rpc call to rpc response enqueued + perf_counter_ptr rpc_client_timeout_throughput; + + // matrix to collect how many to call other tasks + bool collect_call_count; + std::atomic* call_counts; + }; + + static task_spec_profiler* s_spec_profilers = nullptr; + + typedef uint64_extension_helper task_ext_for_profiler; + typedef uint64_extension_helper message_ext_for_profiler; + + // call normal task + static void profiler_on_task_enqueue(task* caller, task* callee) + { + if (caller != nullptr) + { + auto& prof = s_spec_profilers[caller->spec().code]; + if (prof.collect_call_count) + { + prof.call_counts[callee->spec().code]++; + } + } + + task_ext_for_profiler::get(callee) = ::dsn::service::env::now_ns(); + } + + static void profiler_on_task_begin(task* this_) + { + uint64_t& qts = task_ext_for_profiler::get(this_); + uint64_t now = ::dsn::service::env::now_ns(); + s_spec_profilers[this_->spec().code].task_queueing_time_ns->set(now - qts); + qts = now; + } + + static void profiler_on_task_end(task* this_) + { + uint64_t qts = task_ext_for_profiler::get(this_); + uint64_t now = ::dsn::service::env::now_ns(); + s_spec_profilers[this_->spec().code].task_exec_time_ns->set(now - qts); + s_spec_profilers[this_->spec().code].task_throughput->increment(); + } + + static void profiler_on_task_cancelled(task* this_) + { + s_spec_profilers[this_->spec().code].task_cancelled->increment(); + } + + static void profiler_on_task_wait_pre(task* caller, task* callee, uint32_t timeout_ms) + { + + } + + static void profiler_on_task_wait_post(task* caller, task* callee, bool succ) + { + + } + + static void profiler_on_task_cancel_post(task* caller, task* callee, bool succ) + { + + } + + // return true means continue, otherwise early terminate with task::set_error_code + static void profiler_on_aio_call(task* caller, aio_task* callee) + { + auto& prof = s_spec_profilers[caller->spec().code]; + if (prof.collect_call_count) + { + prof.call_counts[callee->spec().code]++; + } + + // time disk io starts + task_ext_for_profiler::get(callee) = ::dsn::service::env::now_ns(); + } + + static void profiler_on_aio_enqueue(aio_task* this_) + { + uint64_t& ats = task_ext_for_profiler::get(this_); + uint64_t now = ::dsn::service::env::now_ns(); + + s_spec_profilers[this_->spec().code].aio_latency_ns->set(now - ats); + ats = now; + } + + // return true means continue, otherwise early terminate with task::set_error_code + static void profiler_on_rpc_call(task* caller, message* req, rpc_response_task* callee) + { + auto& prof = s_spec_profilers[caller->spec().code]; + if (prof.collect_call_count) + { + prof.call_counts[req->header().local_rpc_code]++; + } + + // time rpc starts + if (nullptr != callee) + { + task_ext_for_profiler::get(callee) = ::dsn::service::env::now_ns(); + } + } + + static void profiler_on_rpc_request_enqueue(rpc_request_task* callee) + { + uint64_t now = ::dsn::service::env::now_ns(); + task_ext_for_profiler::get(callee) = now; + message_ext_for_profiler::get(callee->get_request().get()) = now; + } + + static void profiler_on_create_response(message* req, message* resp) + { + message_ext_for_profiler::get(resp) = message_ext_for_profiler::get(req); + } + + // return true means continue, otherwise early terminate with task::set_error_code + static void profiler_on_rpc_reply(task* caller, message* msg) + { + auto& prof = s_spec_profilers[caller->spec().code]; + if (prof.collect_call_count) + { + prof.call_counts[msg->header().local_rpc_code]++; + } + + uint64_t qts = message_ext_for_profiler::get(msg); + uint64_t now = ::dsn::service::env::now_ns(); + + auto code = task_spec::get(msg->header().local_rpc_code)->rpc_paired_code; + s_spec_profilers[code].rpc_server_latency_ns->set(now - qts); + } + + static void profiler_on_rpc_response_enqueue(rpc_response_task* resp) + { + uint64_t& cts = task_ext_for_profiler::get(resp); + uint64_t now = ::dsn::service::env::now_ns(); + if (resp->get_response() != nullptr) + { + s_spec_profilers[resp->spec().code].rpc_client_non_timeout_latency_ns->set(now - cts); + } + else + { + s_spec_profilers[resp->spec().code].rpc_client_timeout_throughput->increment(); + } + cts = now; + } + + void profiler::install(service_spec& spec) + { + s_spec_profilers = new task_spec_profiler[task_code::max_value()+1]; + task_ext_for_profiler::register_ext(); + message_ext_for_profiler::register_ext(); + + auto profile = config()->get_value("task.default", "is_profile", false); + auto collect_call_count = config()->get_value("task.default", "collect_call_count", false); + + for (int i = 0; i <= task_code::max_value(); i++) + { + if (i == TASK_CODE_INVALID) + continue; + + std::string name = std::string("task.") + std::string(task_code::to_string(i)); + task_spec* spec = task_spec::get(i); + dassert (spec != nullptr, "task_spec cannot be null"); + + s_spec_profilers[i].collect_call_count = config()->get_value(name.c_str(), "collect_call_count", collect_call_count); + s_spec_profilers[i].call_counts = new std::atomic[task_code::max_value() + 1]; + + s_spec_profilers[i].task_queueing_time_ns = dsn::utils::perf_counters::instance().get_counter((name + std::string(".queue(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); + s_spec_profilers[i].task_exec_time_ns = dsn::utils::perf_counters::instance().get_counter((name + std::string(".exec(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); + s_spec_profilers[i].task_throughput = dsn::utils::perf_counters::instance().get_counter((name + std::string(".qps")).c_str(), COUNTER_TYPE_RATE, true); + s_spec_profilers[i].task_cancelled = dsn::utils::perf_counters::instance().get_counter((name + std::string(".cancelled#")).c_str(), COUNTER_TYPE_NUMBER, true); + + if (spec->type == task_type::TASK_TYPE_RPC_REQUEST) + { + s_spec_profilers[i].rpc_server_latency_ns = dsn::utils::perf_counters::instance().get_counter((name + std::string(".latency.server")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); + } + else if (spec->type == task_type::TASK_TYPE_RPC_RESPONSE) + { + s_spec_profilers[i].rpc_client_non_timeout_latency_ns = dsn::utils::perf_counters::instance().get_counter((name + std::string(".latency.client(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); + s_spec_profilers[i].rpc_client_timeout_throughput = dsn::utils::perf_counters::instance().get_counter((name + std::string(".timeout.qps")).c_str(), COUNTER_TYPE_RATE, true); + } + else if (spec->type == task_type::TASK_TYPE_AIO) + { + s_spec_profilers[i].aio_latency_ns = dsn::utils::perf_counters::instance().get_counter((name + std::string(".latency(ns)")).c_str(), COUNTER_TYPE_NUMBER_PERCENTILES, true); + } + + if (!config()->get_value(name.c_str(), "is_profile", profile)) + continue; + + spec->on_task_enqueue.put_back(profiler_on_task_enqueue, "profiler"); + spec->on_task_begin.put_back(profiler_on_task_begin, "profiler"); + spec->on_task_end.put_back(profiler_on_task_end, "profiler"); + spec->on_task_cancelled.put_back(profiler_on_task_cancelled, "profiler"); + //spec->on_task_wait_pre.put_back(profiler_on_task_wait_pre, "profiler"); + //spec->on_task_wait_post.put_back(profiler_on_task_wait_post, "profiler"); + //spec->on_task_cancel_post.put_back(profiler_on_task_cancel_post, "profiler"); + spec->on_aio_call.put_back(profiler_on_aio_call, "profiler"); + spec->on_aio_enqueue.put_back(profiler_on_aio_enqueue, "profiler"); + spec->on_rpc_call.put_back(profiler_on_rpc_call, "profiler"); + spec->on_rpc_request_enqueue.put_back(profiler_on_rpc_request_enqueue, "profiler"); + spec->on_create_response.put_back(profiler_on_create_response, "profiler"); + spec->on_rpc_reply.put_back(profiler_on_rpc_reply, "profiler"); + spec->on_rpc_response_enqueue.put_back(profiler_on_rpc_response_enqueue, "profiler"); + } + + // TODO: profiling on overall rpc/network/disk io. + } + + profiler::profiler(const char* name) + : toollet(name) + { + } + } +} diff --git a/src/tools/common/providers.common.cpp b/src/tools/common/providers.common.cpp new file mode 100644 index 0000000000..ce058efe9c --- /dev/null +++ b/src/tools/common/providers.common.cpp @@ -0,0 +1,60 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# include "net_provider.h" +# include +# include "lockp.std.h" +# include "native_aio_provider.win.h" +# include "native_aio_provider.posix.h" +# include "simple_perf_counter.h" +# include "simple_task_queue.h" +# include "network.sim.h" +# include "simple_logger.h" + +namespace dsn { + namespace tools { + void register_common_providers() + { + register_component_provider("dsn::env_provider"); + register_component_provider("dsn::task_worker"); + register_component_provider("dsn::tools::screen_logger"); + register_component_provider("dsn::tools::simple_logger"); + register_component_provider("dsn::tools::std_lock_provider"); + register_component_provider("dsn::tools::std_rwlock_provider"); + register_component_provider("dsn::tools::std_semaphore_provider"); + register_component_provider("dsn::tools::simple_perf_counter"); + register_component_provider("dsn::tools::asio_network_provider"); + register_component_provider("dsn::tools::sim_network_provider"); + register_component_provider("dsn::tools::simple_task_queue"); + register_message_header_parser(NET_HDR_DSN); +#if defined(_WIN32) + register_component_provider("dsn::tools::native_aio_provider"); +#else + register_component_provider("dsn::tools::native_aio_provider"); +#endif + } + } +} diff --git a/src/tools/common/shared_io_service.h b/src/tools/common/shared_io_service.h new file mode 100644 index 0000000000..317f421b05 --- /dev/null +++ b/src/tools/common/shared_io_service.h @@ -0,0 +1,62 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +# include +# include +# include +# include +# include + +namespace dsn { + namespace tools { + + class shared_io_service : public utils::singleton + { + public: + shared_io_service() + { + _io_service_worker_count = config()->get_value("network", "io_service_worker_count", 1); + for (int i = 0; i < _io_service_worker_count; i++) + { + _workers.push_back(std::shared_ptr(new std::thread([this]() + { + boost::asio::io_service::work work(ios); + ios.run(); + }))); + } + } + + boost::asio::io_service ios; + + private: + int _io_service_worker_count; + std::vector> _workers; + }; + + } +} diff --git a/src/tools/common/simple_logger.cpp b/src/tools/common/simple_logger.cpp new file mode 100644 index 0000000000..137b92f971 --- /dev/null +++ b/src/tools/common/simple_logger.cpp @@ -0,0 +1,157 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "simple_logger.h" +# include +# include + +namespace dsn { + namespace tools { + + static void print_header(FILE* fp) + { + uint64_t ts = 0; + if (::dsn::service::system::is_ready()) + ts = ::dsn::service::env::now_ms(); + + char str[24]; + ::dsn::utils::time_ms_to_string(ts, str); + + fprintf(fp, "%s(%llu) ", str, ts); + + task* t = task::get_current_task(); + if (t) + { + if (nullptr != task::get_current_worker()) + { + fprintf(fp, "%6s.%7s%u.%016llx: ", + t->node_name(), + task::get_current_worker()->pool_spec().name.c_str(), + task::get_current_worker()->index(), + static_cast(t->id()) + ); + } + else + { + std::string tid = boost::lexical_cast(boost::this_thread::get_id()); + + fprintf(fp, "%6s.%7s.%s.%016llx: ", + t->node_name(), + "io-thrd", + tid.c_str(), + static_cast(t->id()) + ); + } + } + else + { + std::string tid = boost::lexical_cast(boost::this_thread::get_id()); + fprintf(fp, "%6s.%7s.%s: ", + "system", + "io-thrd", + tid.c_str() + ); + } + } + + void screen_logger::logv(const char *file, + const char *function, + const int line, + logging_level logLevel, + const char* title, + const char *fmt, + va_list args + ) + { + utils::auto_lock l(_lock); + + print_header(stdout); + vprintf(fmt, args); + printf("\n"); + } + + simple_logger::simple_logger(const char *parameter) + : logging_provider(parameter) + { + _index = 0; + _lines = 0; + _log = nullptr; + + create_log_file(); + } + + void simple_logger::create_log_file() + { + if (_log != nullptr) + fclose(_log); + + _lines = 0; + std::stringstream str; + str << "log." << ++_index << ".txt"; + _log = fopen(str.str().c_str(), "w+"); + } + + simple_logger::~simple_logger(void) + { + fclose(_log); + } + + void simple_logger::logv(const char *file, + const char *function, + const int line, + logging_level logLevel, + const char* title, + const char *fmt, + va_list args + ) + { + va_list args2; + if (logLevel >= log_level_WARNING) + { + va_copy(args2, args); + } + + utils::auto_lock l(_lock); + + print_header(_log); + fprintf(_log, "%s, ", title); + vfprintf(_log, fmt, args); + fprintf(_log, "\n"); + if (logLevel >= log_level_ERROR) + fflush(_log); + + if (logLevel >= log_level_WARNING) + { + print_header(stdout); + printf("%s, ", title); + vprintf(fmt, args2); + printf("\n"); + } + + if (++_lines >= 200000) + create_log_file(); + } + } +} diff --git a/src/tools/common/simple_logger.h b/src/tools/common/simple_logger.h new file mode 100644 index 0000000000..17f1838d05 --- /dev/null +++ b/src/tools/common/simple_logger.h @@ -0,0 +1,82 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# pragma once + +# include +# include +# include + +namespace dsn { + namespace tools { + + + class screen_logger : public logging_provider + { + public: + screen_logger(const char *parameter) : logging_provider(parameter) { } + virtual ~screen_logger(void) { } + + virtual void logv(const char *file, + const char *function, + const int line, + logging_level logLevel, + const char* title, + const char *fmt, + va_list args + ); + + private: + std::recursive_mutex _lock; + }; + + + class simple_logger : public logging_provider + { + public: + simple_logger(const char *parameter); + virtual ~simple_logger(void); + + virtual void logv(const char *file, + const char *function, + const int line, + logging_level logLevel, + const char* title, + const char *fmt, + va_list args + ); + + private: + void create_log_file(); + + private: + std::recursive_mutex _lock; + FILE* _log; + int _index; + int _lines; + }; + + } +} diff --git a/src/tools/common/simple_perf_counter.cpp b/src/tools/common/simple_perf_counter.cpp new file mode 100644 index 0000000000..e806eb9c51 --- /dev/null +++ b/src/tools/common/simple_perf_counter.cpp @@ -0,0 +1,291 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "simple_perf_counter.h" +# include "shared_io_service.h" + +namespace dsn { + namespace tools { + + // ----------- NUMBER perf counter --------------------------------- + + class perf_counter_number : public perf_counter + { + public: + perf_counter_number(const char *section, const char *name, perf_counter_type type) + : perf_counter(section, name, type), _val(0){} + ~perf_counter_number(void) {} + + virtual void increment() { _val++; } + virtual void decrement() { _val--; } + virtual void add(uint64_t val) { _val += val; } + virtual void set(uint64_t val) { dassert(false, "invalid execution flow"); } + virtual double get_value() { return static_cast(_val.load()); } + virtual double get_percentile(counter_percentile_type type) { dassert(false, "invalid execution flow"); return 0.0; } + + private: + std::atomic _val; + }; + + // ----------- RATE perf counter --------------------------------- + + class perf_counter_rate : public perf_counter + { + public: + perf_counter_rate(const char *section, const char *name, perf_counter_type type) + : perf_counter(section, name, type), _val(0) + { + qts = 0; + } + ~perf_counter_rate(void) {} + + virtual void increment() { _val++; } + virtual void decrement() { _val--; } + virtual void add(uint64_t val) { _val += val; } + virtual void set(uint64_t val) { dassert(false, "invalid execution flow"); } + virtual double get_value() + { + uint64_t now = ::dsn::service::env::now_ns(); + uint64_t interval = now - qts; + double val = static_cast(_val.load()); + qts = now; + _val = 0; + return val / interval * 1000 * 1000 * 1000; + } + virtual double get_percentile(counter_percentile_type type) { dassert(false, "invalid execution flow"); return 0.0; } + + private: + std::atomic _val; + std::atomic qts; + }; + + // ----------- NUMBER_PERCENTILE perf counter --------------------------------- + + # define MAX_QUEUE_LENGTH 50000 + # define _LEFT 0 + # define _RIGHT 1 + # define _QLEFT 2 + # define _QRIGHT 3 + + class perf_counter_number_percentile : public perf_counter + { + public: + perf_counter_number_percentile(const char *section, const char *name, perf_counter_type type) + : perf_counter(section, name, type), _tail(0) + { + _counter_computation_interval_seconds = config()->get_value("components.simple_perf_counter", "counter_computation_interval_seconds", 30); + + _timer.reset(new boost::asio::deadline_timer(shared_io_service::instance().ios)); + _timer->expires_from_now(boost::posix_time::seconds(rand() % _counter_computation_interval_seconds + 1)); + _timer->async_wait(std::bind(&perf_counter_number_percentile::on_timer, this, std::placeholders::_1)); + } + + ~perf_counter_number_percentile(void) + { + _timer->cancel(); + } + + virtual void increment() { dassert(false, "invalid execution flow"); } + virtual void decrement() { dassert(false, "invalid execution flow"); } + virtual void add(uint64_t val) { dassert(false, "invalid execution flow"); } + virtual void set(uint64_t val) + { + auto idx = _tail++; + _samples[idx % MAX_QUEUE_LENGTH] = val; + } + + virtual double get_value() { dassert(false, "invalid execution flow"); return 0.0; } + + virtual double get_percentile(counter_percentile_type type) + { + if (_tail == 0) + return -1.0; + if ((type < 0) || (type >= COUNTER_PERCENTILE_COUNT)) + { + dassert(false, "send a wrong counter percentile type"); + return -1; + } + return (double)_results[type]; + } + + private: + struct compute_context + { + uint64_t ask[COUNTER_PERCENTILE_COUNT]; + uint64_t tmp[MAX_QUEUE_LENGTH]; + uint64_t mid_tmp[MAX_QUEUE_LENGTH]; + int calc_queue[MAX_QUEUE_LENGTH * 10][4]; + }; + + private: + inline void insert_calc_queue(boost::shared_ptr& ctx, int left, int right, int qleft, int qright, int &calc_tail) + { + calc_tail++; + ctx->calc_queue[calc_tail][_LEFT] = left; + ctx->calc_queue[calc_tail][_RIGHT] = right; + ctx->calc_queue[calc_tail][_QLEFT] = qleft; + ctx->calc_queue[calc_tail][_QRIGHT] = qright; + return; + } + + uint64_t find_mid(boost::shared_ptr& ctx, int left, int right) + { + if (left == right) + return ctx->mid_tmp[left]; + + int index; + for (index = left; index < right; index += 5) + { + int remain_num = index + 5 >= right ? right - index + 1 : 5; + for (int i = index; i < index + remain_num; i++) + { + int j; + uint64_t k = ctx->mid_tmp[i]; + for (j = i - 1; (j >= index) && (ctx->mid_tmp[j] > k); j--) + ctx->mid_tmp[j + 1] = ctx->mid_tmp[j]; + ctx->mid_tmp[j + 1] = k; + } + ctx->mid_tmp[(index - left) / 5] = ctx->mid_tmp[index + remain_num / 2]; + } + + return find_mid(ctx, 0, (right - left - 1) / 5); + } + + inline void select(boost::shared_ptr& ctx, int left, int right, int qleft, int qright, int &calc_tail) + { + int i, j, index, now; + uint64_t mid; + + if (qleft > qright) + return; + + if (left == right) + { + for (i = qleft; i <= qright; i++) + if (ctx->ask[i] == 1) + _results[i] = ctx->tmp[left]; + else + dassert(false, "select percentail wrong!!!"); + return; + } + + for (i = left; i <= right; i++) + ctx->mid_tmp[i] = ctx->tmp[i]; + mid = find_mid(ctx, left, right); + + for (index = left; index <= right; index++) + if (ctx->tmp[index] == mid) + break; + + ctx->tmp[index] = ctx->tmp[left]; + index = left; + for (i = left, j = right; i <= j;) + { + while ((i <= j) && (ctx->tmp[j] > mid)) j--; + if (i <= j) ctx->tmp[index] = ctx->tmp[j], index = j--; + while ((i <= j) && (ctx->tmp[i] < mid)) i++; + if (i <= j) ctx->tmp[index] = ctx->tmp[i], index = i++; + } + ctx->tmp[index] = mid; + + now = index - left + 1; + for (i = qleft; (i <= qright) && (ctx->ask[i] < now); i++); + for (j = i; j <= qright; j++) ctx->ask[j] -= now; + for (j = i; (j <= qright) && (ctx->ask[j] == 0); j++) ctx->ask[j]++; + insert_calc_queue(ctx, left, index - 1, qleft, i - 1, calc_tail); + insert_calc_queue(ctx, index, index, i, j - 1, calc_tail); + insert_calc_queue(ctx, index + 1, right, j, qright, calc_tail); + return; + } + + void calc(boost::shared_ptr& ctx) + { + if (_tail == 0) + return; + + int tmp_num = _tail > MAX_QUEUE_LENGTH ? MAX_QUEUE_LENGTH : _tail.load(); + for (int i = 0; i < tmp_num; i++) + ctx->tmp[i] = _samples[i]; + + ctx->ask[COUNTER_PERCENTILE_50] = (int)(tmp_num * 0.5) + 1; + ctx->ask[COUNTER_PERCENTILE_90] = (int)(tmp_num * 0.90) + 1; + ctx->ask[COUNTER_PERCENTILE_95] = (int)(tmp_num * 0.95) + 1; + ctx->ask[COUNTER_PERCENTILE_99] = (int)(tmp_num * 0.99) + 1; + ctx->ask[COUNTER_PERCENTILE_999] = (int)(tmp_num * 0.999) + 1; + // must be sorted + // std::sort(ctx->ask, ctx->ask + MAX_TYPE_NUMBER); + + int l, r = 0; + + insert_calc_queue(ctx, 0, tmp_num - 1, 0, COUNTER_PERCENTILE_COUNT - 1, r); + for (l = 1; l <= r; l++) + select(ctx, ctx->calc_queue[l][_LEFT], ctx->calc_queue[l][_RIGHT], ctx->calc_queue[l][_QLEFT], ctx->calc_queue[l][_QRIGHT], r); + + return; + } + + void on_timer(const boost::system::error_code& ec) + { + if (!ec) + { + boost::shared_ptr ctx(new compute_context()); + calc(ctx); + + _timer.reset(new boost::asio::deadline_timer(shared_io_service::instance().ios)); + _timer->expires_from_now(boost::posix_time::seconds(_counter_computation_interval_seconds)); + _timer->async_wait(std::bind(&perf_counter_number_percentile::on_timer, this, std::placeholders::_1)); + } + else + { + dassert(false, "on _timer error!!!"); + } + } + + std::shared_ptr _timer; + std::atomic _tail; + uint64_t _samples[MAX_QUEUE_LENGTH]; + uint64_t _results[COUNTER_PERCENTILE_COUNT]; + int _counter_computation_interval_seconds; + }; + + // ---------------------- perf counter dispatcher --------------------- + + simple_perf_counter::simple_perf_counter(const char *section, const char *name, perf_counter_type type) + : perf_counter(section, name, type) + { + if (type == perf_counter_type::COUNTER_TYPE_NUMBER) + _counter_impl = new perf_counter_number(section, name, type); + else if (type == perf_counter_type::COUNTER_TYPE_RATE) + _counter_impl = new perf_counter_rate(section, name, type); + else + _counter_impl = new perf_counter_number_percentile(section, name, type); + } + + simple_perf_counter::~simple_perf_counter(void) + { + delete _counter_impl; + } + } +} diff --git a/src/tools/common/simple_perf_counter.h b/src/tools/common/simple_perf_counter.h new file mode 100644 index 0000000000..63a33d72cd --- /dev/null +++ b/src/tools/common/simple_perf_counter.h @@ -0,0 +1,50 @@ +/* +* The MIT License (MIT) + +* Copyright (c) 2015 Microsoft Corporation, Robust Distributed System Nucleus(rDSN) + +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to deal +* in the Software without restriction, including without limitation the rights +* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: + +* The above copyright notice and this permission notice shall be included in +* all copies or substantial portions of the Software. + +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +* THE SOFTWARE. +*/ +#pragma once + +# include + +namespace dsn { + namespace tools { + + class simple_perf_counter : public perf_counter + { + public: + simple_perf_counter(const char *section, const char *name, perf_counter_type type); + ~simple_perf_counter(void); + + virtual void increment() { _counter_impl->increment(); } + virtual void decrement() { _counter_impl->decrement(); } + virtual void add(uint64_t val) { _counter_impl->add(val); } + virtual void set(uint64_t val) { _counter_impl->set(val); } + virtual double get_value() { return _counter_impl->get_value(); } + virtual double get_percentile(counter_percentile_type type) { return _counter_impl->get_percentile(type); } + + private: + perf_counter *_counter_impl; + }; + + } +} + diff --git a/src/tools/common/simple_task_queue.cpp b/src/tools/common/simple_task_queue.cpp new file mode 100644 index 0000000000..d07851154d --- /dev/null +++ b/src/tools/common/simple_task_queue.cpp @@ -0,0 +1,75 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "shared_io_service.h" +# include "simple_task_queue.h" + + +# define __TITLE__ "task.queue.simple" + +namespace dsn { + namespace tools{ + simple_task_queue::simple_task_queue(task_worker_pool* pool, int index, task_queue* inner_provider) + : task_queue(pool, index, inner_provider), _samples("") + { + } + + void simple_task_queue::enqueue(task_ptr& task) + { + if (task->delay_milliseconds() == 0) + _samples.enqueue(task, task->spec().priority); + else + { + std::shared_ptr timer(new boost::asio::deadline_timer(shared_io_service::instance().ios)); + timer->expires_from_now(boost::posix_time::milliseconds(task->delay_milliseconds())); + task->set_delay(0); + + timer->async_wait([this, task, timer](const boost::system::error_code& ec) + { + if (!ec) + { + task->enqueue(); + } + else + { + dfatal("delayed execution failed for task %s, err = %u", + task->spec().name, ec.value()); + } + }); + } + } + + task_ptr simple_task_queue::dequeue() + { + int c = 0; + return _samples.dequeue(c); + } + + int simple_task_queue::count() const + { + return _samples.count(); + } + } +} diff --git a/src/tools/common/simple_task_queue.h b/src/tools/common/simple_task_queue.h new file mode 100644 index 0000000000..9590b0a65c --- /dev/null +++ b/src/tools/common/simple_task_queue.h @@ -0,0 +1,47 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +# include +# include + +namespace dsn { + namespace tools { + class simple_task_queue : public task_queue + { + public: + simple_task_queue(task_worker_pool* pool, int index, task_queue* inner_provider); + + virtual void enqueue(task_ptr& task); + virtual task_ptr dequeue(); + virtual int count() const; + + private: + typedef utils::blocking_priority_queue tqueue; + tqueue _samples; + }; + } +} diff --git a/src/tools/common/tracer.cpp b/src/tools/common/tracer.cpp new file mode 100644 index 0000000000..36c0228641 --- /dev/null +++ b/src/tools/common/tracer.cpp @@ -0,0 +1,258 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include + +#define __TITLE__ "toollet.tracer" + +namespace dsn { + namespace tools { + + static void tracer_on_task_enqueue(task* caller, task* callee) + { + ddebug("%s ENQUEUE, task_id = %016llx", + callee->spec().name, + callee->id() + ); + } + + static void tracer_on_task_begin(task* this_) + { + switch (this_->spec().type) + { + case task_type::TASK_TYPE_COMPUTE: + case task_type::TASK_TYPE_AIO: + ddebug("%s EXEC BEGIN, task_id = %016llx", + this_->spec().name, + this_->id() + ); + break; + case task_type::TASK_TYPE_RPC_REQUEST: + { + auto tsk = (rpc_request_task*)this_; + ddebug("%s EXEC BEGIN, task_id = %016llx, %s:%d => %s:%d, rpc_id = %016llx", + this_->spec().name, + this_->id(), + tsk->get_request()->header().from_address.name.c_str(), + static_cast(tsk->get_request()->header().from_address.port), + tsk->get_request()->header().to_address.name.c_str(), + static_cast(tsk->get_request()->header().to_address.port), + tsk->get_request()->header().rpc_id + ); + } + break; + case task_type::TASK_TYPE_RPC_RESPONSE: + { + auto tsk = (rpc_response_task*)this_; + ddebug("%s EXEC BEGIN, task_id = %016llx, %s:%d => %s:%d, rpc_id = %016llx", + this_->spec().name, + this_->id(), + tsk->get_request()->header().to_address.name.c_str(), + static_cast(tsk->get_request()->header().to_address.port), + tsk->get_request()->header().from_address.name.c_str(), + static_cast(tsk->get_request()->header().from_address.port), + tsk->get_request()->header().rpc_id + ); + } + break; + } + } + + static void tracer_on_task_end(task* this_) + { + ddebug("%s EXEC END, task_id = %016llx, err = %s", + this_->spec().name, + this_->id(), + this_->error().to_string() + ); + } + + static void tracer_on_task_cancelled(task* this_) + { + ddebug("%s CANCELLED, task_id = %016llx", + this_->spec().name, + this_->id() + ); + } + + static void tracer_on_task_wait_pre(task* caller, task* callee, uint32_t timeout_ms) + { + + } + + static void tracer_on_task_wait_post(task* caller, task* callee, bool succ) + { + + } + + static void tracer_on_task_cancel_post(task* caller, task* callee, bool succ) + { + + } + + // return true means continue, otherwise early terminate with task::set_error_code + static void tracer_on_aio_call(task* caller, aio_task* callee) + { + ddebug("%s AIO.CALL, task_id = %016llx", + callee->spec().name, + callee->id() + ); + } + + static void tracer_on_aio_enqueue(aio_task* this_) + { + ddebug("%s AIO.ENQUEUE, task_id = %016llx", + this_->spec().name, + this_->id() + ); + } + + // return true means continue, otherwise early terminate with task::set_error_code + static void tracer_on_rpc_call(task* caller, message* req, rpc_response_task* callee) + { + message_header& hdr = req->header(); + + char str[24]; + ::dsn::utils::time_ms_to_string(hdr.client.timeout_ts_us/1000, str); + + ddebug( + "%s RPC.CALL: %s:%d => %s:%d, rpc_id = %016llx, callback_task = %016llx, timeout @ %s", + hdr.rpc_name, + hdr.from_address.name.c_str(), + static_cast(hdr.from_address.port), + hdr.to_address.name.c_str(), + static_cast(hdr.to_address.port), + hdr.rpc_id, + callee ? callee->id() : 0, + str + ); + } + + static void tracer_on_rpc_request_enqueue(rpc_request_task* callee) + { + ddebug("%s RPC.REQUEST.ENQUEUE, task_id = %016llx, %s:%d => %s:%d, rpc_id = %016llx", + callee->spec().name, + callee->id(), + callee->get_request()->header().from_address.name.c_str(), + static_cast(callee->get_request()->header().from_address.port), + callee->get_request()->header().to_address.name.c_str(), + static_cast(callee->get_request()->header().to_address.port), + callee->get_request()->header().rpc_id + ); + } + + // return true means continue, otherwise early terminate with task::set_error_code + static void tracer_on_rpc_reply(task* caller, message* msg) + { + message_header& hdr = msg->header(); + + ddebug( + "%s RPC.REPLY: %s:%d => %s:%d, rpc_id = %016llx", + hdr.rpc_name, + hdr.from_address.name.c_str(), + static_cast(hdr.from_address.port), + hdr.to_address.name.c_str(), + static_cast(hdr.to_address.port), + hdr.rpc_id + ); + } + + static void tracer_on_rpc_response_enqueue(rpc_response_task* resp) + { + ddebug("%s RPC.RESPONSE.ENQUEUE, task_id = %016llx, %s:%d => %s:%d, rpc_id = %016llx", + resp->spec().name, + resp->id(), + resp->get_request()->header().to_address.name.c_str(), + static_cast(resp->get_request()->header().to_address.port), + resp->get_request()->header().from_address.name.c_str(), + static_cast(resp->get_request()->header().from_address.port), + resp->get_request()->header().rpc_id + ); + } + + void tracer::install(service_spec& spec) + { + auto trace = config()->get_value("task.default", "is_trace", false); + + for (int i = 0; i <= task_code::max_value(); i++) + { + if (i == TASK_CODE_INVALID) + continue; + + std::string section_name = std::string("task.") + std::string(task_code::to_string(i)); + task_spec* spec = task_spec::get(i); + dassert (spec != nullptr, "task_spec cannot be null"); + + if (!config()->get_value(section_name.c_str(), "is_trace", trace)) + continue; + + if (config()->get_value(section_name.c_str(), "tracer::on_task_enqueue", true)) + spec->on_task_enqueue.put_back(tracer_on_task_enqueue, "tracer"); + + if (config()->get_value(section_name.c_str(), "tracer::on_task_begin", true)) + spec->on_task_begin.put_back(tracer_on_task_begin, "tracer"); + + if (config()->get_value(section_name.c_str(), "tracer::on_task_end", true)) + spec->on_task_end.put_back(tracer_on_task_end, "tracer"); + + //if (config()->get_value(section_name.c_str(), "tracer::on_task_cancelled", true)) + // spec->on_task_cancelled.put_back(tracer_on_task_cancelled, "tracer"); + + //if (config()->get_value(section_name.c_str(), "tracer::on_task_wait_pre", true)) + //spec->on_task_wait_pre.put_back(tracer_on_task_wait_pre, "tracer"); + + //if (config()->get_value(section_name.c_str(), "tracer::on_task_wait_post", true)) + //spec->on_task_wait_post.put_back(tracer_on_task_wait_post, "tracer"); + + //if (config()->get_value(section_name.c_str(), "tracer::on_task_cancel_post", true)) + //spec->on_task_cancel_post.put_back(tracer_on_task_cancel_post, "tracer"); + + if (config()->get_value(section_name.c_str(), "tracer::on_aio_call", true)) + spec->on_aio_call.put_back(tracer_on_aio_call, "tracer"); + + if (config()->get_value(section_name.c_str(), "tracer::on_aio_enqueue", true)) + spec->on_aio_enqueue.put_back(tracer_on_aio_enqueue, "tracer"); + + if (config()->get_value(section_name.c_str(), "tracer::on_rpc_call", true)) + spec->on_rpc_call.put_back(tracer_on_rpc_call, "tracer"); + + if (config()->get_value(section_name.c_str(), "tracer::on_rpc_request_enqueue", true)) + spec->on_rpc_request_enqueue.put_back(tracer_on_rpc_request_enqueue, "tracer"); + + if (config()->get_value(section_name.c_str(), "tracer::on_rpc_reply", true)) + spec->on_rpc_reply.put_back(tracer_on_rpc_reply, "tracer"); + + if (config()->get_value(section_name.c_str(), "tracer::on_rpc_response_enqueue", true)) + spec->on_rpc_response_enqueue.put_back(tracer_on_rpc_response_enqueue, "tracer"); + } + } + + tracer::tracer(const char* name) + : toollet(name) + { + } + } +} diff --git a/src/tools/simulator/CMakeLists.txt b/src/tools/simulator/CMakeLists.txt new file mode 100644 index 0000000000..2d138fae68 --- /dev/null +++ b/src/tools/simulator/CMakeLists.txt @@ -0,0 +1 @@ +dsn_add_library(dsn.tools.simulator) diff --git a/src/tools/simulator/diske.sim.cpp b/src/tools/simulator/diske.sim.cpp new file mode 100644 index 0000000000..659d7474e7 --- /dev/null +++ b/src/tools/simulator/diske.sim.cpp @@ -0,0 +1,53 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "diske.sim.h" +#include + +#define __TITLE__ "aio_provider" + +namespace dsn { namespace tools { + +DEFINE_TASK_CODE(LPC_NATIVE_AIO_REDIRECT, TASK_PRIORITY_HIGH, THREAD_POOL_DEFAULT) + +sim_aio_provider::sim_aio_provider(disk_engine* disk, aio_provider* inner_provider) +: NATIVE_AIO_PROVIDER(disk, inner_provider) +{ +} + +sim_aio_provider::~sim_aio_provider(void) +{ +} + +void sim_aio_provider::aio(aio_task_ptr& aio) +{ + error_code err; + uint32_t bytes; + + err = aio_internal(aio, false, &bytes); + complete_io(aio, err, bytes, 0); +} + +}} // end namespace diff --git a/src/tools/simulator/diske.sim.h b/src/tools/simulator/diske.sim.h new file mode 100644 index 0000000000..d4814a1136 --- /dev/null +++ b/src/tools/simulator/diske.sim.h @@ -0,0 +1,48 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include +#if defined(_WIN32) +#define NATIVE_AIO_PROVIDER native_win_aio_provider +#include "../common/native_aio_provider.win.h" +#else +#define NATIVE_AIO_PROVIDER native_posix_aio_provider +#include "../common/native_aio_provider.posix.h" +#endif + +namespace dsn { namespace tools { + +class sim_aio_provider : public NATIVE_AIO_PROVIDER +{ +public: + sim_aio_provider(disk_engine* disk, aio_provider* inner_provider); + ~sim_aio_provider(void); + + virtual void aio(aio_task_ptr& aio); +}; + +}} // end namespace diff --git a/src/tools/simulator/env.sim.cpp b/src/tools/simulator/env.sim.cpp new file mode 100644 index 0000000000..f408bf10c2 --- /dev/null +++ b/src/tools/simulator/env.sim.cpp @@ -0,0 +1,76 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "env.sim.h" +#include "scheduler.h" + +#define __TITLE__ "env.provider.simulator" + +namespace dsn { namespace tools { + +/*static*/ int sim_env_provider::_seed; + +uint64_t sim_env_provider::now_ns() const +{ + return scheduler::instance().now_ns(); +} + +uint64_t sim_env_provider::random64(uint64_t min, uint64_t max) +{ + uint64_t gap = max - min + 1; + if (gap == 1) return min; + + uint64_t v = ((uint64_t)std::rand()); + v *= ((uint64_t)std::rand()); + v *= ((uint64_t)std::rand()); + v *= ((uint64_t)std::rand()); + v *= ((uint64_t)std::rand()); + v ^= ((uint64_t)std::rand()); + return gap == 0 ? (min + v) : (min + v % gap); +} + +void sim_env_provider::on_worker_start(task_worker* worker) +{ + std::srand((_seed + worker->index() + worker->index()*worker->pool_spec().pool_code) ^ worker->index()); +} + +sim_env_provider::sim_env_provider(env_provider* inner_provider) + : env_provider(inner_provider) +{ + task_worker::on_start.put_front(on_worker_start, "sim_env_provider::on_worker_start"); + + if (config()->get_value("tools.simulator", "use_given_random_seed", false)) + { + _seed = config()->get_value("tools.simulator", "random_seed", std::rand()); + } + else + { + _seed = static_cast(get_current_physical_time_ns()); + } + + derror("simulation.random seed for this round is %d", _seed); +} + +}} // end namespace diff --git a/src/tools/simulator/env.sim.h b/src/tools/simulator/env.sim.h new file mode 100644 index 0000000000..915e95050f --- /dev/null +++ b/src/tools/simulator/env.sim.h @@ -0,0 +1,46 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include + +namespace dsn { namespace tools { + +class sim_env_provider : public env_provider +{ +public: + sim_env_provider(env_provider* inner_provider); + + // service local time (can be logical or physical) + virtual uint64_t now_ns() const; + virtual uint64_t random64(uint64_t min, uint64_t max); + +private: + static void on_worker_start(task_worker* worker); + static int _seed; +}; + +}} // end namespace diff --git a/src/tools/simulator/scheduler.cpp b/src/tools/simulator/scheduler.cpp new file mode 100644 index 0000000000..2fb71f7d9e --- /dev/null +++ b/src/tools/simulator/scheduler.cpp @@ -0,0 +1,223 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +# include "scheduler.h" +# include "env.sim.h" +# include +# include + +namespace dsn { namespace tools { + +void event_wheel::add_event(uint64_t ts, task_ptr& task) +{ + utils::auto_lock l(_lock); + + std::vector* evts; + auto itr = _events.find(ts); + if (itr != _events.end()) + evts = itr->second; + else + { + evts = new std::vector(); + _events.insert(std::make_pair(ts, evts)); + } + + evts->push_back(task); + +} + +std::vector* event_wheel::pop_next_events(__out_param uint64_t& ts) +{ + utils::auto_lock l(_lock); + + std::vector* evts = NULL; + auto itr = _events.begin(); + if (itr != _events.end()){ + evts = itr->second; + ts = itr->first; + _events.erase(itr); + } + return evts; +} + +void event_wheel::clear() +{ + utils::auto_lock l(_lock); + _events.clear(); +} + +////////////////////////////////////////////////////////////////////////////////////////////// + +scheduler::scheduler(void) +{ + _time_ns = 0; + _running = false; + task_worker::on_create.put_back(on_task_worker_create, "simulation.on_task_worker_create"); + task_worker::on_start.put_back(on_task_worker_start, "simulation.on_task_worker_start"); + + for (int i = 0; i <= task_code::max_value(); i++) + { + task_spec::get(i)->on_task_wait_pre.put_back(scheduler::on_task_wait, "simulation.on_task_wait"); + task_spec::get(i)->on_task_end.put_back(scheduler::on_task_end, "simulation.on_task_end"); + } + + task_ext::register_ext(task_state_ext::deletor); + task_worker_ext::register_ext(sim_worker_state::deletor); +} + +scheduler::~scheduler(void) +{ +} + + +/*static*/ void scheduler::on_task_worker_start(task_worker* worker) +{ + while (!scheduler::instance()._running) + { + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } +} + +/*static*/ void scheduler::on_task_worker_create(task_worker* worker) +{ + auto s = task_worker_ext::get_inited(worker); + s->worker = worker; + s->first_time_schedule = true; + s->in_continuation = false; + s->index = static_cast(scheduler::instance()._threads.size()); + scheduler::instance()._threads.push_back(s); +} + +/*static*/ void scheduler::on_task_wait(task* waitor, task* waitee, uint32_t timeout_milliseconds) +{ + if (waitor == nullptr) + return; + + if (waitee->state() < task_state::TASK_STATE_FINISHED) + { + auto ts = task_ext::get_inited(waitee); + ts->wait_threads.push_back(task_worker_ext::get(task::get_current_worker())); + + scheduler::instance().wait_schedule(true, false); + } + else + { + scheduler::instance().wait_schedule(true, true); + } +} + +/*static*/ void scheduler::on_task_end(task* task) +{ + auto ts = task_ext::get(task); + if (ts != nullptr) + { + for (auto& w : ts->wait_threads) + { + w->is_continuation_ready = true; + } + } +} + +void scheduler::add_task(task_ptr& tsk, task_queue* q) +{ + auto ts = task_ext::get_inited(tsk.get()); + ts->queue = q; + + auto delay = (uint64_t)tsk->delay_milliseconds() * 1000000; + tsk->set_delay(0); + _wheel.add_event(now_ns() + delay, tsk); +} + +void scheduler::wait_schedule(bool in_continue, bool is_continue_ready /*= false*/) +{ + auto s = task_worker_ext::get(task::get_current_worker()); + s->in_continuation = in_continue; + s->is_continuation_ready = is_continue_ready; + + if (s->first_time_schedule) + { + s->first_time_schedule = false; + if (s->index == 0) + schedule(); + } + else + { + schedule(); + } + s->runnable.wait(TIME_MS_MAX); +} + +void scheduler::schedule() +{ + while (true) + { + // run ready workers whenever possible + std::vector ready_workers; + for (auto& s : _threads) + { + if ((s->in_continuation && s->is_continuation_ready) + || (!s->in_continuation && s->worker->queue()->count() > 0) + ) + { + ready_workers.push_back(s->index); + } + } + + if (ready_workers.size() > 0) + { + int i = dsn::service::env::random32(0, (uint32_t)ready_workers.size() - 1); + _threads[ready_workers[i]]->runnable.release(); + return; + } + + // otherwise, run the timed tasks + uint64_t ts = 0; + auto events = _wheel.pop_next_events(ts); + if (events) + { + { + utils::auto_lock l(_lock); + _time_ns = ts; + } + + // randomize the events, and see + std::random_shuffle(events->begin(), events->end(), [](int n) { return dsn::service::env::random32(0, n - 1); }); + + for (auto it = events->begin(); it != events->end(); it++) + { + ::dsn::service::tasking::enqueue(*it); + } + + delete events; + continue; + } + + // wait a moment + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } +} + + +}} // end namespace diff --git a/src/tools/simulator/scheduler.h b/src/tools/simulator/scheduler.h new file mode 100644 index 0000000000..3017e98ae2 --- /dev/null +++ b/src/tools/simulator/scheduler.h @@ -0,0 +1,114 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include +#include + +namespace dsn { namespace tools { + +class event_wheel +{ +public: + ~event_wheel() { clear(); } + + void add_event(uint64_t ts, task_ptr& task); + std::vector* pop_next_events(__out_param uint64_t& ts); + void clear(); + bool has_more_events() const { utils::auto_lock l(_lock); return _events.size() > 0; } + +private: + typedef std::map*> Events; + Events _events; + mutable std::recursive_mutex _lock; +}; + +struct sim_worker_state +{ + utils::semaphore runnable; + int index; + task_worker *worker; + bool first_time_schedule; + bool in_continuation; + bool is_continuation_ready; + + static void deletor(void* p) + { + delete (sim_worker_state*)p; + } +}; + +class scheduler : public utils::singleton +{ +public: + scheduler(void); + ~scheduler(void); + + void start() { _running = true; } + uint64_t now_ns() const { utils::auto_lock l(_lock); return _time_ns; } + + void reset(); + void add_task(task_ptr& task, task_queue* q); + void wait_schedule(bool in_continue, bool is_continue_ready = false); + +public: + struct task_state_ext + { + task_queue *queue; + std::list wait_threads; + + static void deletor(void* p) + { + delete (task_state_ext*)p; + } + }; + typedef object_extension_helper task_worker_ext; + typedef object_extension_helper task_ext; + +private: + event_wheel _wheel; + mutable std::recursive_mutex _lock; + uint64_t _time_ns; + bool _running; + std::vector _threads; + +private: + void schedule(); + + static void on_task_worker_create(task_worker* worker); + static void on_task_worker_start(task_worker* worker); + static void on_task_wait(task* waitor, task* waitee, uint32_t timeout_milliseconds); + static void on_task_end(task* task); +}; + +// ------------------ inline implementation ---------------------------- + +inline void scheduler::reset() +{ + _wheel.clear(); +} + +}} // end namespace diff --git a/src/tools/simulator/simulator.cpp b/src/tools/simulator/simulator.cpp new file mode 100644 index 0000000000..2bc3de51bc --- /dev/null +++ b/src/tools/simulator/simulator.cpp @@ -0,0 +1,92 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include +#include "scheduler.h" +#include + +#include "diske.sim.h" +#include "env.sim.h" +#include "task_engine.sim.h" + +namespace dsn { namespace tools { + +void simulator::install(service_spec& spec) +{ + register_common_providers(); + + register_component_provider("dsn::tools::sim_aio_provider"); + register_component_provider("dsn::tools::sim_env_provider"); + register_component_provider("dsn::tools::sim_task_queue"); + register_component_provider("dsn::tools::sim_semaphore_provider"); + + scheduler::instance(); + + if (spec.aio_factory_name == "") + spec.aio_factory_name = ("dsn::tools::sim_aio_provider"); + + if (spec.env_factory_name == "") + spec.env_factory_name = ("dsn::tools::sim_env_provider"); + + network_config_spec_default cs; + cs.factory_name = "dsn::tools::sim_network_provider"; + cs.message_buffer_block_size = 1024 * 64; + spec.network_default_configs[RPC_CHANNEL_TCP] = cs; + spec.network_default_configs[RPC_CHANNEL_UDP] = cs; + + if (spec.perf_counter_factory_name == "") + spec.perf_counter_factory_name = "dsn::tools::simple_perf_counter"; + + if (spec.logging_factory_name == "") + spec.logging_factory_name = "dsn::tools::simple_logger"; + + if (spec.lock_factory_name == "") + spec.lock_factory_name = ("dsn::tools::std_lock_provider"); + + if (spec.rwlock_factory_name == "") + spec.rwlock_factory_name = ("dsn::tools::std_rwlock_provider"); + + if (spec.semaphore_factory_name == "") + spec.semaphore_factory_name = ("dsn::tools::sim_semaphore_provider"); + + for (auto it = spec.threadpool_specs.begin(); it != spec.threadpool_specs.end(); it++) + { + threadpool_spec& tspec = *it; + + if (tspec.worker_factory_name == "") + tspec.worker_factory_name = ("dsn::task_worker"); + + if (tspec.queue_factory_name == "") + tspec.queue_factory_name = ("dsn::tools::sim_task_queue"); + } +} + +void simulator::run() +{ + scheduler::instance().start(); + tool_app::run(); +} + +}} // end namespace dsn::tools diff --git a/src/tools/simulator/task_engine.sim.cpp b/src/tools/simulator/task_engine.sim.cpp new file mode 100644 index 0000000000..fbc3195c93 --- /dev/null +++ b/src/tools/simulator/task_engine.sim.cpp @@ -0,0 +1,106 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "task_engine.sim.h" +#include "scheduler.h" + +namespace dsn { namespace tools { + +sim_task_queue::sim_task_queue(task_worker_pool* pool, int index, task_queue* inner_provider) +: task_queue(pool, index, inner_provider) +{ +} + +void sim_task_queue::enqueue(task_ptr& task) +{ + if (0 == task->delay_milliseconds()) + { + if (_tasks.size() > 0) + { + do { + int random_pos = ::dsn::service::env::random32(0, 1000000); + auto pr = _tasks.insert(std::map::value_type(random_pos, task)); + if (pr.second) break; + } while (true); + } + else + { + int random_pos = ::dsn::service::env::random32(0, 1000000); + _tasks.insert(std::map::value_type(random_pos, task)); + } + } + else + { + scheduler::instance().add_task(task, this); + } +} + +task_ptr sim_task_queue::dequeue() +{ + scheduler::instance().wait_schedule(false); + + if (_tasks.size() > 0) + { + task_ptr t = _tasks.begin()->second; + _tasks.erase(_tasks.begin()); + return t; + } + else + { + return nullptr; + } +} + +void sim_semaphore_provider::signal(int count) +{ + _count += count; + + while (!_wait_threads.empty() && _count > 0) + { + --_count; + + sim_worker_state* thread = _wait_threads.front(); + _wait_threads.pop_front(); + thread->is_continuation_ready = true; + } +} + +bool sim_semaphore_provider::wait(int timeout_milliseconds) +{ + if (_count > 0) + { + --_count; + scheduler::instance().wait_schedule(true, true); + return true; + } + else + { + _wait_threads.push_back(scheduler::task_worker_ext::get(task::get_current_worker())); + scheduler::instance().wait_schedule(true, false); + return true; + } +} + +}} // end namespace diff --git a/src/tools/simulator/task_engine.sim.h b/src/tools/simulator/task_engine.sim.h new file mode 100644 index 0000000000..4c46b2c049 --- /dev/null +++ b/src/tools/simulator/task_engine.sim.h @@ -0,0 +1,64 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus (rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#pragma once + +#include +#include + +namespace dsn { namespace tools { + +class sim_task_queue : public task_queue +{ +public: + sim_task_queue(task_worker_pool* pool, int index, task_queue* inner_provider); + + virtual void enqueue(task_ptr& task); + virtual task_ptr dequeue(); + virtual int count() const { return static_cast(_tasks.size()); } + +private: + std::map _tasks; +}; + +struct sim_worker_state; +class sim_semaphore_provider : public semaphore_provider +{ +public: + sim_semaphore_provider(dsn::service::zsemaphore *sema, int initialCount, semaphore_provider *inner_provider) + : semaphore_provider(sema, initialCount, inner_provider), _count(initialCount) + { + } + +public: + virtual void signal(int count); + virtual bool wait(int timeout_milliseconds); + +private: + int _count; + std::list _wait_threads; +}; + +}} // end namespace diff --git a/tutorial/counter.proto b/tutorial/counter.proto new file mode 100644 index 0000000000..cb88475f30 --- /dev/null +++ b/tutorial/counter.proto @@ -0,0 +1,24 @@ +package dsn.example; + +message count_op +{ + required string name = 1; + required int32 operand = 2; +} + +message count_name +{ + required string name = 1; +} + +message count_result +{ + required int32 value = 1; +} + +service counter +{ + rpc add (count_op) returns (count_result); + rpc read (count_name) returns (count_result); +} + diff --git a/tutorial/counter.proto.annotations b/tutorial/counter.proto.annotations new file mode 100644 index 0000000000..001b55558e --- /dev/null +++ b/tutorial/counter.proto.annotations @@ -0,0 +1,12 @@ +; annotation format +;[type.name[[.subname]...]] +;key = vlaue + +[service.counter] +stateful = true ; counter is a stateful service + +[function.counter.add] +write = true ; counter.add is a write function + +[function.counter.read] +write = false diff --git a/tutorial/counter.replication/counter.server.impl.cpp b/tutorial/counter.replication/counter.server.impl.cpp new file mode 100644 index 0000000000..23366c1c34 --- /dev/null +++ b/tutorial/counter.replication/counter.server.impl.cpp @@ -0,0 +1,251 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus(rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# include "counter.server.impl.h" +# include + +namespace dsn { + namespace example { + + + counter_service_impl::counter_service_impl(replica* replica, configuration_ptr& config) + : counter_service(replica, config) + { + } + + void counter_service_impl::on_add(const ::dsn::example::count_op& op, ::dsn::service::rpc_replier& reply) + { + zauto_lock l(_lock); + auto rt = _counters[op.name] += op.operand; + reply(rt); + } + + void counter_service_impl::on_read(const std::string& name, ::dsn::service::rpc_replier& reply) + { + zauto_lock l(_lock); + + auto it = _counters.find(name); + if (it == _counters.end()) + { + reply(0); + } + else + { + reply(it->second); + } + } + + int counter_service_impl::open(bool create_new) + { + zauto_lock l(_lock); + if (create_new) + { + boost::filesystem::remove_all(dir()); + boost::filesystem::create_directory(dir()); + } + else + { + recover(); + } + return 0; + } + + int counter_service_impl::close(bool clear_state) + { + zauto_lock l(_lock); + if (clear_state) + { + boost::filesystem::remove_all(dir()); + } + return 0; + } + + // checkpoint related + void counter_service_impl::recover() + { + zauto_lock l(_lock); + + _counters.clear(); + + decree max_ver = 0; + std::string name; + boost::filesystem::directory_iterator end_it; + for (boost::filesystem::directory_iterator it(dir()); + it != end_it; + ++it) + { + auto s = it->path().filename().string(); + if (s.substr(0, strlen("checkpoint.")) != std::string("checkpoint.")) + continue; + + decree version = atol(s.substr(strlen("checkpoint.")).c_str()); + if (version > max_ver) + { + max_ver = version; + name = dir() + "/" + s; + } + } + + if (max_ver > 0) + { + recover(name, max_ver); + } + } + + void counter_service_impl::recover(const std::string& name, decree version) + { + zauto_lock l(_lock); + + std::ifstream is(name.c_str()); + if (!is.is_open()) + return; + + + _counters.clear(); + + uint32_t count; + is.read((char*)&count, sizeof(count)); + + for (uint32_t i = 0; i < count; i++) + { + std::string key; + int32_t value; + + uint32_t sz; + is.read((char*)&sz, (uint32_t)sizeof(sz)); + key.resize(sz); + is.read((char*)&key[0], sz); + + is.read((char*)&value, sizeof(value)); + + _counters[key] = value; + } + + _last_durable_decree = _last_committed_decree = version; + } + + int counter_service_impl::flush(bool force) + { + zauto_lock l(_lock); + + if (last_committed_decree() == last_durable_decree()) + { + return ERR_SUCCESS; + } + + // TODO: should use async write instead + char name[256]; + sprintf(name, "%s/checkpoint.%lld", dir().c_str(), + static_cast(last_committed_decree())); + std::ofstream os(name); + + uint32_t count = (uint32_t)_counters.size(); + os.write((const char*)&count, (uint32_t)sizeof(count)); + + for (auto it = _counters.begin(); it != _counters.end(); it++) + { + const std::string& k = it->first; + uint32_t sz = (uint32_t)k.length(); + + os.write((const char*)&sz, (uint32_t)sizeof(sz)); + os.write((const char*)&k[0], sz); + os.write((const char*)&it->second, sizeof(int32_t)); + } + + _last_durable_decree = last_committed_decree(); + return ERR_SUCCESS; + } + + // helper routines to accelerate learning + int counter_service_impl::get_learn_state(decree start, const blob& learn_request, __out_param learn_state& state) + { + ::dsn::binary_writer writer; + + zauto_lock l(_lock); + + int magic = 0xdeadbeef; + writer.write(magic); + + writer.write(_last_committed_decree.load()); + + dassert(_last_committed_decree >= 0, ""); + + int count = static_cast(_counters.size()); + writer.write(count); + + for (auto it = _counters.begin(); it != _counters.end(); it++) + { + writer.write(it->first); + writer.write(it->second); + } + + auto bb = writer.get_buffer(); + auto buf = bb.buffer(); + + state.meta = blob(buf, static_cast(bb.data() - bb.buffer().get()), bb.length()); + + return ERR_SUCCESS; + } + + int counter_service_impl::apply_learn_state(learn_state& state) + { + blob bb((const char*)state.meta.data(), 0, state.meta.length()); + + binary_reader reader(bb); + + zauto_lock l(_lock); + + _counters.clear(); + + int magic; + reader.read(magic); + + dassert(magic == 0xdeadbeef, ""); + + decree decree; + reader.read(decree); + + dassert(decree >= 0, ""); + + int count; + reader.read(count); + + for (int i = 0; i < count; i++) + { + std::string key; + int32_t value; + reader.read(key); + reader.read(value); + _counters[key] = value; + } + + _last_committed_decree = decree; + _last_durable_decree = 0; + + return flush(true); + } + } +} \ No newline at end of file diff --git a/tutorial/counter.replication/counter.server.impl.h b/tutorial/counter.replication/counter.server.impl.h new file mode 100644 index 0000000000..104a9b2d63 --- /dev/null +++ b/tutorial/counter.replication/counter.server.impl.h @@ -0,0 +1,71 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2015 Microsoft Corporation + * + * -=- Robust Distributed System Nucleus(rDSN) -=- + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +# pragma once + +# include "counter.server.h" + +namespace dsn { + namespace example { + + using namespace ::dsn::replication; + + class counter_service_impl + : public counter_service + { + public: + counter_service_impl(replica* replica, configuration_ptr& config); + + virtual void on_add(const ::dsn::example::count_op& op, ::dsn::service::rpc_replier& reply); + virtual void on_read(const std::string& name, ::dsn::service::rpc_replier& reply); + + // + // interfaces to be implemented by app + // all return values are error code + // + virtual int open(bool create_new); // singel threaded + virtual int close(bool clear_state); // must be thread-safe + + // update _last_durable_decree internally + virtual int flush(bool force); // must be thread-safe + + // + // helper routines to accelerate learning + // + virtual int get_learn_state(decree start, const blob& learn_request, __out_param learn_state& state); // must be thread-safe + virtual int apply_learn_state(learn_state& state); // must be thread-safe, and last_committed_decree must equal to last_durable_decree after learning + + private: + void recover(); + void recover(const std::string& name, decree version); + + private: + ::dsn::service::zlock _lock; + std::map _counters; + }; + } +} + diff --git a/tutorial/counter.thrift b/tutorial/counter.thrift new file mode 100644 index 0000000000..166775fac9 --- /dev/null +++ b/tutorial/counter.thrift @@ -0,0 +1,13 @@ +namespace cpp dsn.example + +struct count_op +{ + 1: string name; + 2: i32 operand; +} + +service counter +{ + i32 add(1:count_op op); + i32 read(1:string name); +} diff --git a/tutorial/counter.thrift.annotations b/tutorial/counter.thrift.annotations new file mode 100644 index 0000000000..001b55558e --- /dev/null +++ b/tutorial/counter.thrift.annotations @@ -0,0 +1,12 @@ +; annotation format +;[type.name[[.subname]...]] +;key = vlaue + +[service.counter] +stateful = true ; counter is a stateful service + +[function.counter.add] +write = true ; counter.add is a write function + +[function.counter.read] +write = false