diff --git a/CMakeLists.txt b/CMakeLists.txt index 107f857..219a421 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,7 +126,6 @@ add_library(simsycl include/simsycl/sycl/usm.hh include/simsycl/sycl/vec.hh include/simsycl/system.hh - include/simsycl/templates.hh ${CONFIG_PATH} src/simsycl/check.cc src/simsycl/context.cc @@ -136,7 +135,6 @@ add_library(simsycl src/simsycl/queue.cc src/simsycl/system.cc src/simsycl/system_config.cc - src/simsycl/templates.cc ) target_link_libraries(simsycl PRIVATE Boost::context diff --git a/include/simsycl/system.hh b/include/simsycl/system.hh index f625f90..d9290f6 100644 --- a/include/simsycl/system.hh +++ b/include/simsycl/system.hh @@ -114,6 +114,10 @@ struct system_config { std::unordered_map devices{}; }; +extern const platform_config default_platform; +extern const device_config default_device; +extern const system_config default_system; + void configure_system(const system_config &system); } // namespace simsycl diff --git a/include/simsycl/templates.hh b/include/simsycl/templates.hh deleted file mode 100644 index c0a9997..0000000 --- a/include/simsycl/templates.hh +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include "sycl/forward.hh" - -namespace simsycl::templates::platform { - -extern const platform_config cuda_12_2; - -} - -namespace simsycl::templates::device::nvidia { - -extern const device_config rtx_3090; - -} diff --git a/src/simsycl/system.cc b/src/simsycl/system.cc index 87dce56..c473f9a 100644 --- a/src/simsycl/system.cc +++ b/src/simsycl/system.cc @@ -3,7 +3,6 @@ #include "simsycl/detail/check.hh" #include "simsycl/sycl/device.hh" #include "simsycl/sycl/platform.hh" -#include "simsycl/templates.hh" #include #include @@ -133,14 +132,7 @@ struct system_state { std::optional system; system_state &get_system() { - if(!detail::system.has_value()) { - system_config config; - config.platforms.emplace("CUDA", simsycl::templates::platform::cuda_12_2); - for(int i = 0; i < 4; ++i) { - config.devices.emplace("GPU#" + std::to_string(i), simsycl::templates::device::nvidia::rtx_3090); - } - configure_system(config); - } + if(!system.has_value()) { system.emplace(default_system); } return system.value(); } @@ -267,4 +259,106 @@ namespace simsycl { void configure_system(const system_config &system) { detail::system.emplace(system); } +const platform_config default_platform{ + .version = "0.1", + .name = "SimSYCL", + .vendor = "SimSYCL", + .extensions = {}, +}; + +const device_config default_device{ + .device_type = sycl::info::device_type::gpu, + .vendor_id = 0, + .max_compute_units = 16, + .max_work_item_dimensions = 3, + .max_work_item_sizes_1 = {1024}, + .max_work_item_sizes_2 = {1024, 1024}, + .max_work_item_sizes_3 = {64, 1024, 1024}, + .max_work_group_size = 1024, + .max_num_sub_groups = 32, + .sub_group_sizes = {32}, + .preferred_vector_width_char = 4, + .preferred_vector_width_short = 2, + .preferred_vector_width_int = 1, + .preferred_vector_width_long = 1, + .preferred_vector_width_float = 1, + .preferred_vector_width_double = 1, + .preferred_vector_width_half = 2, + .native_vector_width_char = 4, + .native_vector_width_short = 2, + .native_vector_width_int = 1, + .native_vector_width_long = 1, + .native_vector_width_float = 1, + .native_vector_width_double = 1, + .native_vector_width_half = 2, + .max_clock_frequency = 1000, + .address_bits = 64, + .max_mem_alloc_size = std::numeric_limits::max(), + .image_support = false, + .max_read_image_args = 0, + .max_write_image_args = 0, + .image2d_max_height = 0, + .image2d_max_width = 0, + .image3d_max_height = 0, + .image3d_max_width = 0, + .image3d_max_depth = 0, + .image_max_buffer_size = 0, + .max_samplers = 0, + .max_parameter_size = std::numeric_limits::max(), + .mem_base_addr_align = 8, + .half_fp_config + = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, sycl::info::fp_config::round_to_nearest, + sycl::info::fp_config::round_to_zero, sycl::info::fp_config::round_to_inf, sycl::info::fp_config::fma, + sycl::info::fp_config::correctly_rounded_divide_sqrt}, + .single_fp_config + = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, sycl::info::fp_config::round_to_nearest, + sycl::info::fp_config::round_to_zero, sycl::info::fp_config::round_to_inf, sycl::info::fp_config::fma, + sycl::info::fp_config::correctly_rounded_divide_sqrt}, + .double_fp_config + = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, sycl::info::fp_config::round_to_nearest, + sycl::info::fp_config::round_to_zero, sycl::info::fp_config::round_to_inf, sycl::info::fp_config::fma, + sycl::info::fp_config::correctly_rounded_divide_sqrt}, + .global_mem_cache_type = sycl::info::global_mem_cache_type::read_write, + .global_mem_cache_line_size = 128, + .global_mem_cache_size = 16 << 20, + .global_mem_size = std::numeric_limits::max(), + .max_constant_buffer_size = 1 << 16, + .max_constant_args = std::numeric_limits::max(), + .local_mem_type = sycl::info::local_mem_type::local, + .local_mem_size = 64 << 10, + .error_correction_support = false, + .host_unified_memory = false, + .profiling_timer_resolution = 1, + .is_endian_little = true, + .is_available = true, + .is_compiler_available = true, + .is_linker_available = true, + .execution_capabilities = {sycl::info::execution_capability::exec_kernel}, + .queue_profiling = true, + .built_in_kernels = {}, + .platform_id = "SimSYCL", + .name = "SimSYCL virtual GPU", + .vendor = "SimSYCL", + .driver_version = "0.1", + .profile = "FULL_PROFILE", + .version = "0.1", + .aspects + = { sycl::aspect::gpu, sycl::aspect::accelerator, sycl::aspect::fp64, sycl::aspect::atomic64, + sycl::aspect::queue_profiling, sycl::aspect::usm_device_allocations, sycl::aspect::usm_host_allocations, + sycl::aspect::usm_shared_allocations, }, + .extensions = {}, + .printf_buffer_size = std::numeric_limits::max(), + .preferred_interop_user_sync = true, + .partition_max_sub_devices = 0, + .partition_properties = {}, + .partition_affinity_domains = {sycl::info::partition_affinity_domain::not_applicable}, + .partition_type_property = sycl::info::partition_property::no_partition, + .partition_type_affinity_domain = sycl::info::partition_affinity_domain::not_applicable, +}; + +const system_config default_system{ + .platforms = {{"SimSYCL", default_platform}}, + .devices = {{"GPU", default_device}}, +}; + } // namespace simsycl diff --git a/src/simsycl/templates.cc b/src/simsycl/templates.cc deleted file mode 100644 index 2da980c..0000000 --- a/src/simsycl/templates.cc +++ /dev/null @@ -1,110 +0,0 @@ -#include "simsycl/templates.hh" -#include "simsycl/sycl/device.hh" -#include "simsycl/sycl/platform.hh" -#include "simsycl/system.hh" - - -namespace simsycl::templates::platform { - -const platform_config cuda_12_2{ - .version = "12.2.0", - .name = "CUDA", - .vendor = "NVIDIA", - .extensions = {}, -}; - -} - -namespace simsycl::templates::device::nvidia { - -const device_config rtx_3090{ - .device_type = sycl::info::device_type::gpu, - .vendor_id = 4318, - .max_compute_units = 82, - .max_work_item_dimensions = 3, - .max_work_item_sizes_1 = {1024}, - .max_work_item_sizes_2 = {1024, 1024}, - .max_work_item_sizes_3 = {64, 1024, 1024}, - .max_work_group_size = 1024, - .max_num_sub_groups = 32, - .sub_group_sizes = {32}, - .preferred_vector_width_char = 4, - .preferred_vector_width_short = 2, - .preferred_vector_width_int = 1, - .preferred_vector_width_long = 1, - .preferred_vector_width_float = 1, - .preferred_vector_width_double = 1, - .preferred_vector_width_half = 2, - .native_vector_width_char = 4, - .native_vector_width_short = 2, - .native_vector_width_int = 1, - .native_vector_width_long = 1, - .native_vector_width_float = 1, - .native_vector_width_double = 1, - .native_vector_width_half = 2, - .max_clock_frequency = 1695, - .address_bits = 64, - .max_mem_alloc_size = 25438126080ull, - .image_support = false, - .max_read_image_args = 0, - .max_write_image_args = 0, - .image2d_max_height = 0, - .image2d_max_width = 0, - .image3d_max_height = 0, - .image3d_max_width = 0, - .image3d_max_depth = 0, - .image_max_buffer_size = 0, - .max_samplers = 0, - .max_parameter_size = 18446744073709551615ull, - .mem_base_addr_align = 8, - .half_fp_config - = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, sycl::info::fp_config::round_to_nearest, - sycl::info::fp_config::round_to_zero, sycl::info::fp_config::round_to_inf, sycl::info::fp_config::fma, - sycl::info::fp_config::correctly_rounded_divide_sqrt}, - .single_fp_config - = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, sycl::info::fp_config::round_to_nearest, - sycl::info::fp_config::round_to_zero, sycl::info::fp_config::round_to_inf, sycl::info::fp_config::fma, - sycl::info::fp_config::correctly_rounded_divide_sqrt}, - .double_fp_config - = {sycl::info::fp_config::denorm, sycl::info::fp_config::inf_nan, sycl::info::fp_config::round_to_nearest, - sycl::info::fp_config::round_to_zero, sycl::info::fp_config::round_to_inf, sycl::info::fp_config::fma, - sycl::info::fp_config::correctly_rounded_divide_sqrt}, - .global_mem_cache_type = sycl::info::global_mem_cache_type::read_write, - .global_mem_cache_line_size = 128, - .global_mem_cache_size = 6291456, - .global_mem_size = 25438126080, - .max_constant_buffer_size = 65536, - .max_constant_args = 4294967295, - .local_mem_type = sycl::info::local_mem_type::local, - .local_mem_size = 49152, - .error_correction_support = false, - .host_unified_memory = false, - .profiling_timer_resolution = 1, - .is_endian_little = true, - .is_available = true, - .is_compiler_available = true, - .is_linker_available = true, - .execution_capabilities = {sycl::info::execution_capability::exec_kernel}, - .queue_profiling = true, - .built_in_kernels = {}, - .platform_id = "CUDA", - .name = "NVIDIA GeForce RTX 3090", - .vendor = "NVIDIA", - .driver_version = "12010", - .profile = "FULL_PROFILE", - .version = "sm_86", - .aspects - = { sycl::aspect::gpu, sycl::aspect::accelerator, sycl::aspect::fp64, sycl::aspect::atomic64, - sycl::aspect::queue_profiling, sycl::aspect::usm_device_allocations, sycl::aspect::usm_host_allocations, - sycl::aspect::usm_shared_allocations, }, - .extensions = {}, - .printf_buffer_size = 18446744073709551615ull, - .preferred_interop_user_sync = true, - .partition_max_sub_devices = 0, - .partition_properties = {}, - .partition_affinity_domains = {sycl::info::partition_affinity_domain::not_applicable}, - .partition_type_property = sycl::info::partition_property::no_partition, - .partition_type_affinity_domain = sycl::info::partition_affinity_domain::not_applicable, -}; - -} // namespace simsycl::templates::device::nvidia diff --git a/test/launch_tests.cc b/test/launch_tests.cc index a5feb75..c2f637d 100644 --- a/test/launch_tests.cc +++ b/test/launch_tests.cc @@ -2,7 +2,6 @@ #include #include -#include #include @@ -81,12 +80,12 @@ TEMPLATE_TEST_CASE_SIG( TEST_CASE( "parallel_for(nd_range) correctly will re-use fibers and local allocations when the number of groups is large", "[launch]") { - simsycl::system_config system; - simsycl::device_config device = simsycl::templates::device::nvidia::rtx_3090; + simsycl::device_config device = simsycl::default_device; device.max_compute_units = 2; // we currently allocate #max_compute_units groups worth of fibers - system.platforms.emplace("CUDA", simsycl::templates::platform::cuda_12_2); - system.devices.emplace("GPU", device); - simsycl::configure_system(system); + simsycl::configure_system({ + .platforms = {{"SimSYCL", simsycl::default_platform}}, + .devices = {{"GPU", device}}, + }); sycl::range<1> global_range(256); sycl::range<1> local_range(16);