From 18070344d755ece04d169e6cc40775cae9288cee Mon Sep 17 00:00:00 2001 From: tbbdev Date: Wed, 18 Dec 2019 11:41:52 +0300 Subject: [PATCH] Committing TBB 2020 source code --- CHANGES | 20 +- Makefile | 16 +- README.md | 4 +- build/Makefile.tbbbind | 69 ++++ build/Makefile.test | 2 + build/index.html | 11 +- build/linux.inc | 7 + build/windows.inc | 7 + cmake/README.rst | 1 + cmake/templates/TBBConfig.cmake.in | 20 +- cmake/templates/TBBConfigInternal.cmake.in | 13 +- doc/Release_Notes.txt | 90 ++--- include/tbb/concurrent_hash_map.h | 4 +- include/tbb/flow_graph.h | 2 + include/tbb/info.h | 52 +++ include/tbb/internal/_aggregator_impl.h | 2 +- include/tbb/task_arena.h | 80 +++- include/tbb/tbb_config.h | 12 +- include/tbb/tbb_exception.h | 2 +- include/tbb/tbb_stddef.h | 6 +- src/Makefile | 19 +- src/perf/time_resumable_tasks.cpp | 2 +- src/tbb/arena.cpp | 21 +- src/tbb/arena.h | 5 + src/tbb/co_context.h | 2 +- src/tbb/governor.cpp | 148 +++++++ src/tbb/lin32-tbb-export.lst | 5 + src/tbb/lin32-tbbbind-export.def | 22 ++ src/tbb/lin64-tbb-export.lst | 5 + src/tbb/lin64-tbbbind-export.def | 22 ++ src/tbb/mac32-tbb-export.lst | 5 + src/tbb/mac64-tbb-export.lst | 5 + src/tbb/observer_proxy.cpp | 3 +- src/tbb/scheduler.h | 6 + src/tbb/task_group_context.cpp | 2 + src/tbb/tbb_misc.h | 30 ++ src/tbb/tbbbind.cpp | 309 +++++++++++++++ src/tbb/win32-tbb-export.lst | 5 + src/tbb/win32-tbbbind-export.def | 20 + src/tbb/win64-gcc-tbb-export.lst | 5 + src/tbb/win64-tbb-export.lst | 5 + src/tbb/win64-tbbbind-export.def | 20 + src/test/harness.h | 28 ++ src/test/test_aggregator.cpp | 5 +- src/test/test_arena_constraints_hwloc.cpp | 361 ++++++++++++++++++ src/test/test_arena_constraints_stubs.cpp | 41 ++ src/test/test_concurrent_associative_common.h | 6 +- src/test/test_join_node.cpp | 1 + src/test/test_resumable_tasks.cpp | 39 ++ src/test/test_source_node.cpp | 12 +- src/test/test_tbb_version.cpp | 4 +- 51 files changed, 1474 insertions(+), 109 deletions(-) create mode 100644 build/Makefile.tbbbind create mode 100644 include/tbb/info.h create mode 100644 src/tbb/lin32-tbbbind-export.def create mode 100644 src/tbb/lin64-tbbbind-export.def create mode 100644 src/tbb/tbbbind.cpp create mode 100644 src/tbb/win32-tbbbind-export.def create mode 100644 src/tbb/win64-tbbbind-export.def create mode 100644 src/test/test_arena_constraints_hwloc.cpp create mode 100644 src/test/test_arena_constraints_stubs.cpp diff --git a/CHANGES b/CHANGES index 79bb6754b8..cc5e720f2e 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,21 @@ The list of most significant changes made over time in Intel(R) Threading Building Blocks (Intel(R) TBB). +Intel TBB 2020 +TBB_INTERFACE_VERSION == 11100 + +Changes (w.r.t. Intel TBB 2019 Update 9): + +- Extended task_arena interface to simplify development of NUMA-aware + applications. +- Added warning notifications when the deprecated functionality is used. + +Open-source contributions integrated: + +- Fixed various build warnings + (https://github.com/intel/tbb/pull/179) by Raf Schietekat. + +------------------------------------------------------------------------ Intel TBB 2019 Update 9 TBB_INTERFACE_VERSION == 11009 @@ -13,7 +28,7 @@ Changes (w.r.t. Intel TBB 2019 Update 8): Preview Features: -- Added isolated_task_group class that allows multiple threads to add +- Added isolated_task_group class that allows multiple threads to add and execute tasks sharing the same isolation. - Extended the flow graph API to simplify connecting nodes. - Added erase() by heterogeneous keys for concurrent ordered containers. @@ -27,6 +42,9 @@ Bugs fixed: - Fixed a bug in the merge() method of concurrent unordered containers. - Fixed behavior of a continue_node that follows buffering nodes. +- Fixed compilation error caused by missed stdlib.h when CMake + integration is used (https://github.com/intel/tbb/issues/195). + Inspired by Andrew Penkrat. Open-source contributions integrated: diff --git a/Makefile b/Makefile index 3603007ed7..116cf9edfc 100644 --- a/Makefile +++ b/Makefile @@ -13,6 +13,7 @@ # limitations under the License. tbb_root?=. +cfg?=release include $(tbb_root)/build/common.inc .PHONY: default all tbb tbbmalloc tbbproxy test examples @@ -25,25 +26,22 @@ default: tbb tbbmalloc $(if $(use_proxy),tbbproxy) all: tbb tbbmalloc tbbproxy test examples tbb: mkdir - $(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbb cfg=debug $(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbb cfg=release tbbmalloc: mkdir - $(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc $(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc tbbproxy: mkdir - $(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=debug tbbproxy $(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbproxy cfg=release tbbproxy +tbbbind: mkdir + $(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbbind cfg=release tbbbind + test: tbb tbbmalloc $(if $(use_proxy),tbbproxy) - -$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=debug malloc_test - -$(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.test cfg=debug -$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbmalloc cfg=release malloc_test -$(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.test cfg=release rml: mkdir - $(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.rml cfg=debug $(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.rml cfg=release examples: tbb tbbmalloc @@ -60,8 +58,6 @@ doxygen: clean: clean_examples $(shell $(RM) $(work_dir)_release$(SLASH)*.* >$(NUL) 2>$(NUL)) $(shell $(RD) $(work_dir)_release >$(NUL) 2>$(NUL)) - $(shell $(RM) $(work_dir)_debug$(SLASH)*.* >$(NUL) 2>$(NUL)) - $(shell $(RD) $(work_dir)_debug >$(NUL) 2>$(NUL)) @echo clean done clean_examples: @@ -69,8 +65,7 @@ clean_examples: mkdir: $(shell $(MD) "$(work_dir)_release" >$(NUL) 2>$(NUL)) - $(shell $(MD) "$(work_dir)_debug" >$(NUL) 2>$(NUL)) - @echo Created $(work_dir)_release and ..._debug directories + @echo Created the $(work_dir)_release directory info: @echo OS: $(tbb_os) @@ -78,4 +73,3 @@ info: @echo compiler=$(compiler) @echo runtime=$(runtime) @echo tbb_build_prefix=$(tbb_build_prefix) - diff --git a/README.md b/README.md index 926a964e40..d2ebc0ac1d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# Threading Building Blocks 2019 Update 9 -[![Stable release](https://img.shields.io/badge/version-2019_U9-green.svg)](https://github.com/intel/tbb/releases/tag/2019_U9) +# Threading Building Blocks 2020 +[![Stable release](https://img.shields.io/badge/version-2020-green.svg)](https://github.com/intel/tbb/releases/tag/2020) [![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE) Threading Building Blocks (TBB) lets you easily write parallel C++ programs that take diff --git a/build/Makefile.tbbbind b/build/Makefile.tbbbind new file mode 100644 index 0000000000..21a6f1bd6a --- /dev/null +++ b/build/Makefile.tbbbind @@ -0,0 +1,69 @@ +# Copyright (c) 2005-2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#------------------------------------------------------------------------------ +# Define rules for making the tbbbind shared library. +#------------------------------------------------------------------------------ + +tbb_root ?= "$(TBBROOT)" +BUILDING_PHASE=1 +include $(tbb_root)/build/common.inc +CPLUS_FLAGS += $(SDL_FLAGS) +DEBUG_SUFFIX=$(findstring _debug,_$(cfg)) + +#------------------------------------------------------------ +# Define static pattern rules dealing with .cpp source files +#------------------------------------------------------------ +$(warning CONFIG: cfg=$(cfg) arch=$(arch) compiler=$(compiler) target=$(target) runtime=$(runtime)) + +.PHONY: tbbbind +.PRECIOUS: %.$(OBJ) + +VPATH = $(tbb_root)/src/tbb/$(ASSEMBLY_SOURCE) $(tbb_root)/src/tbb $(tbb_root)/src/old $(tbb_root)/src/rml/client + +CPLUS_FLAGS += $(PIC_KEY) $(DSE_KEY) + +# Suppress superfluous warnings for tbbbind compilation +WARNING_KEY += $(WARNING_SUPPRESS) + +include $(tbb_root)/build/common_rules.inc + +TBBBIND.OBJ = tbbbind.$(OBJ) + +ifneq (,$(TBBBIND.DEF)) +tbbbind.def: $(TBBBIND.DEF) + $(CPLUS) $(PREPROC_ONLY) $< $(CPLUS_FLAGS) $(INCLUDES) > $@ + +LIB_LINK_FLAGS += $(EXPORT_KEY)tbbbind.def +$(TBBBIND.DLL): tbbbind.def +endif + +ifneq (,$(TBBBIND.DLL)) +$(TBBBIND.DLL): BUILDING_LIBRARY = $(TBBBIND.DLL) +$(TBBBIND.DLL): $(TBBBIND.OBJ) $(TBBBIND_NO_VERSION.DLL) + $(LIB_LINK_CMD) $(LIB_OUTPUT_KEY)$(TBBBIND.DLL) $(TBBBIND.OBJ) $(HWLOC.LIB) $(LINK_TBB.LIB) $(LIB_LINK_FLAGS) +endif + +ifneq (,$(TBBBIND_NO_VERSION.DLL)) +$(TBBBIND_NO_VERSION.DLL): + echo "INPUT ($(TBBBIND.DLL))" > $(TBBBIND_NO_VERSION.DLL) +endif + +tbbbind: $(TBB.DLL) $(TBBBIND.DLL) + +#clean: +# $(RM) *.$(OBJ) *.$(DLL) *.res *.map *.ilk *.pdb *.exp *.manifest *.tmp *.d core core.*[0-9][0-9] *.ver + +# Include automatically generated dependencies +-include *.d diff --git a/build/Makefile.test b/build/Makefile.test index 4eb0010a31..f63fc44681 100644 --- a/build/Makefile.test +++ b/build/Makefile.test @@ -252,6 +252,8 @@ endif test_opencl_node.$(TEST_EXT): LIBS += $(OPENCL.LIB) +test_arena_constraints_hwloc.$(TEST_EXT): LIBS += $(HWLOC.LIB) + $(TEST_TBB_PLAIN.EXE) $(TEST_TBB_SPECIAL.EXE): WARNING_KEY += $(TEST_WARNING_KEY) # Run tests that are in SCHEDULER_DIRECTLY_INCLUDED and TEST_TBB_PLAIN.EXE but not in skip_tests (which is specified by user) diff --git a/build/index.html b/build/index.html index ae047697da..d11f9a1d72 100644 --- a/build/index.html +++ b/build/index.html @@ -18,6 +18,9 @@

Files

Makefile.tbbmalloc
Main Makefile to build the Intel TBB scalable memory allocator library as well as its tests. Invoked via 'make tbbmalloc' from top-level Makefile. +
Makefile.tbbbind +
Main Makefile to build the tbbbind library. + Invoked via 'make tbbbind' from top-level Makefile
Makefile.test
Main Makefile to build and run the tests for the Intel TBB library. Invoked via 'make test' from top-level Makefile. @@ -73,7 +76,7 @@

Software prerequisites:

  • Explicitly specify the architecture when invoking GNU make, e.g. make arch=ia32. -

    The default make target will build the release and debug versions of the Intel TBB library.

    +

    The default make target will build the release version of the Intel TBB library.

    Other targets are available in the top-level Makefile. You might find the following targets useful:

    • make test will build and run Intel TBB unit-tests; @@ -111,13 +114,15 @@

      Software prerequisites:

      cd src;make debug
      Build and test debug libraries only.
      make tbb -
      Make Intel TBB release and debug libraries. +
      Make Intel TBB release libraries.
      make tbbmalloc
      Make Intel TBB scalable memory allocator libraries. +
      make tbbbind +
      Make the tbbbind library.
      make test
      Compile and run unit-tests
      make examples -
      Build libraries and run all examples, like doing make debug clean release from the general example Makefile. +
      Build libraries and run all examples, like doing make clean release from the general example Makefile. Available in the open-source version only.
      make python
      Build, install, and test Python* API for Intel TBB. See details here. diff --git a/build/linux.inc b/build/linux.inc index 4d59aaacf8..7be05c12d4 100644 --- a/build/linux.inc +++ b/build/linux.inc @@ -116,6 +116,13 @@ TBB.LIB = $(TBB.DLL) TBB_NO_VERSION.DLL=libtbb$(CPF_SUFFIX)$(DEBUG_SUFFIX).$(DLL) LINK_TBB.LIB = $(TBB_NO_VERSION.DLL) +TBBBIND_NO_VERSION.DLL = libtbbbind$(DEBUG_SUFFIX).$(DLL) +TBBBIND.DEF = $(tbb_root)/src/tbb/$(def_prefix)-tbbbind-export.def +TBBBIND.DLL = $(TBBBIND_NO_VERSION.DLL).$(SONAME_SUFFIX) +TBBBIND.LIB = $(TBBBIND_NO_VERSION.DLL) +LINK_TBBBIND.LIB = $(TBBBIND.LIB) +HWLOC.LIB = -lhwloc + MALLOC_NO_VERSION.DLL = libtbbmalloc$(DEBUG_SUFFIX).$(MALLOC_DLL) MALLOC.DEF = $(MALLOC_ROOT)/$(def_prefix)-tbbmalloc-export.def MALLOC.DLL = $(MALLOC_NO_VERSION.DLL).$(SONAME_SUFFIX) diff --git a/build/windows.inc b/build/windows.inc index 0d50e079e7..45c4c7bf85 100644 --- a/build/windows.inc +++ b/build/windows.inc @@ -79,6 +79,13 @@ ifneq ($(filter vc8 vc9,$(runtime)),) TBB.MANIFEST = tbbmanifest.exe.manifest endif +TBBBIND.DEF = $(tbb_root)/src/tbb/$(def_prefix)-tbbbind-export.def +TBBBIND.DLL = tbbbind$(DEBUG_SUFFIX).$(DLL) +TBBBIND.LIB = tbbbind$(DEBUG_SUFFIX).$(LIBEXT) +TBBBIND.RES = tbbbind.res +LINK_TBBBIND.LIB = $(TBBBIND.LIB) +HWLOC.LIB = libhwloc.$(LIBEXT) + MALLOC.DEF = $(MALLOC_ROOT)/$(def_prefix)-tbbmalloc-export.def MALLOC.DLL = tbbmalloc$(DEBUG_SUFFIX).$(DLL) MALLOC.LIB = tbbmalloc$(DEBUG_SUFFIX).$(LIBEXT) diff --git a/cmake/README.rst b/cmake/README.rst index 52cc46def8..6997a43754 100644 --- a/cmake/README.rst +++ b/cmake/README.rst @@ -197,6 +197,7 @@ If ``tbbmalloc_proxy`` is requested, ``tbbmalloc`` component will also be added TBBConfig creates `imported targets `_ as shared libraries using the following format: ``TBB::`` (for example, ``TBB::tbb``, ``TBB::tbbmalloc``). +Set ``TBB_FIND_RELEASE_ONLY`` to ``TRUE`` before ``find_package`` call in order to search only for release TBB version. This variable helps to avoid simultaneous linkage of release and debug TBB versions when CMake configuration is `Debug` but a third-party component depends on release TBB version. Variables set during TBB configuration: ========================= ================================================ diff --git a/cmake/templates/TBBConfig.cmake.in b/cmake/templates/TBBConfig.cmake.in index d5f2eabc8b..bd75d938c3 100644 --- a/cmake/templates/TBBConfig.cmake.in +++ b/cmake/templates/TBBConfig.cmake.in @@ -47,14 +47,20 @@ unset(_tbbmalloc_proxy_ix) foreach (_tbb_component ${TBB_FIND_COMPONENTS}) set(TBB_${_tbb_component}_FOUND 0) - set(_tbb_release_lib "${CMAKE_CURRENT_LIST_DIR}/@TBB_LIB_REL_PATH@/@TBB_LIB_PREFIX@${_tbb_component}.@TBB_LIB_EXT@") - set(_tbb_debug_lib "${CMAKE_CURRENT_LIST_DIR}/@TBB_LIB_REL_PATH@/@TBB_LIB_PREFIX@${_tbb_component}_debug.@TBB_LIB_EXT@") + get_filename_component(_tbb_release_lib "${CMAKE_CURRENT_LIST_DIR}/@TBB_LIB_REL_PATH@/@TBB_LIB_PREFIX@${_tbb_component}.@TBB_LIB_EXT@" ABSOLUTE) + + if (NOT TBB_FIND_RELEASE_ONLY) + get_filename_component(_tbb_debug_lib "${CMAKE_CURRENT_LIST_DIR}/@TBB_LIB_REL_PATH@/@TBB_LIB_PREFIX@${_tbb_component}_debug.@TBB_LIB_EXT@" ABSOLUTE) + endif() if (EXISTS "${_tbb_release_lib}" OR EXISTS "${_tbb_debug_lib}") if (NOT TARGET TBB::${_tbb_component}) add_library(TBB::${_tbb_component} SHARED IMPORTED) + + get_filename_component(_tbb_include_dir "${CMAKE_CURRENT_LIST_DIR}/@TBB_INC_REL_PATH@" ABSOLUTE) set_target_properties(TBB::${_tbb_component} PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${CMAKE_CURRENT_LIST_DIR}/@TBB_INC_REL_PATH@") + INTERFACE_INCLUDE_DIRECTORIES "${_tbb_include_dir}") + unset(_tbb_include_dir) if (EXISTS "${_tbb_release_lib}") set_target_properties(TBB::${_tbb_component} PROPERTIES @@ -72,14 +78,16 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS}) if (_tbb_component STREQUAL tbbmalloc_proxy) set_target_properties(TBB::tbbmalloc_proxy PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbbmalloc) endif() - else() - message(STATUS "Using previously found TBB::${_tbb_component}") endif() list(APPEND TBB_IMPORTED_TARGETS TBB::${_tbb_component}) set(TBB_${_tbb_component}_FOUND 1) elseif (TBB_FIND_REQUIRED AND TBB_FIND_REQUIRED_${_tbb_component}) message(STATUS "Missed required Intel TBB component: ${_tbb_component}") - message(STATUS " one or both of:\n ${_tbb_release_lib}\n ${_tbb_debug_lib}\n files must exist.") + if (TBB_FIND_RELEASE_ONLY) + message(STATUS " ${_tbb_release_lib} must exist.") + else() + message(STATUS " one or both of:\n ${_tbb_release_lib}\n ${_tbb_debug_lib}\n files must exist.") + endif() set(TBB_FOUND FALSE) endif() endforeach() diff --git a/cmake/templates/TBBConfigInternal.cmake.in b/cmake/templates/TBBConfigInternal.cmake.in index c3d7ee4caa..ffd096596d 100644 --- a/cmake/templates/TBBConfigInternal.cmake.in +++ b/cmake/templates/TBBConfigInternal.cmake.in @@ -49,7 +49,10 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS}) set(TBB_${_tbb_component}_FOUND 0) set(_tbb_release_lib "@TBB_RELEASE_LIB_PATH@/@TBB_LIB_PREFIX@${_tbb_component}.@TBB_LIB_EXT@") - set(_tbb_debug_lib "@TBB_DEBUG_LIB_PATH@/@TBB_LIB_PREFIX@${_tbb_component}_debug.@TBB_LIB_EXT@") + + if (NOT TBB_FIND_RELEASE_ONLY) + set(_tbb_debug_lib "@TBB_DEBUG_LIB_PATH@/@TBB_LIB_PREFIX@${_tbb_component}_debug.@TBB_LIB_EXT@") + endif() if (EXISTS "${_tbb_release_lib}" OR EXISTS "${_tbb_debug_lib}") if (NOT TARGET TBB::${_tbb_component}) @@ -73,14 +76,16 @@ foreach (_tbb_component ${TBB_FIND_COMPONENTS}) if (_tbb_component STREQUAL tbbmalloc_proxy) set_target_properties(TBB::tbbmalloc_proxy PROPERTIES INTERFACE_LINK_LIBRARIES TBB::tbbmalloc) endif() - else() - message(STATUS "Using previously found TBB::${_tbb_component}") endif() list(APPEND TBB_IMPORTED_TARGETS TBB::${_tbb_component}) set(TBB_${_tbb_component}_FOUND 1) elseif (TBB_FIND_REQUIRED AND TBB_FIND_REQUIRED_${_tbb_component}) message(STATUS "Missed required Intel TBB component: ${_tbb_component}") - message(STATUS " one or both of:\n ${_tbb_release_lib}\n ${_tbb_debug_lib}\n files must exist.") + if (TBB_FIND_RELEASE_ONLY) + message(STATUS " ${_tbb_release_lib} must exist.") + else() + message(STATUS " one or both of:\n ${_tbb_release_lib}\n ${_tbb_debug_lib}\n files must exist.") + endif() set(TBB_FOUND FALSE) endif() endforeach() diff --git a/doc/Release_Notes.txt b/doc/Release_Notes.txt index 0ed0c2e249..028ab0f5b9 100644 --- a/doc/Release_Notes.txt +++ b/doc/Release_Notes.txt @@ -1,16 +1,23 @@ ------------------------------------------------------------------------ Intel(R) Threading Building Blocks - Release Notes - Version 2019 + Version 2020 ------------------------------------------------------------------------ +Intel(R) Threading Building Blocks (Intel(R) TBB) is planned to be +improved through renewal and update with latest C++ standards to increase +usability. Evaluation of some Intel(R) TBB features removal will be +conducted in some future releases. Features under consideration are mapped +to updated options as described in the 'TBB Revamp: Background,Changes, +and Modernization' +(see https://software.intel.com/en-us/articles/tbb-revamp). System Requirements ------------------- -Intel(R) Threading Building Blocks (Intel(R) TBB) is available -commercially (see http://software.intel.com/en-us/intel-tbb) as a -binary distribution, and in open source, in both source and binary -forms (see http://threadingbuildingblocks.org). +Intel(R) Threading Building Blocks is available commercially +(see http://software.intel.com/en-us/intel-tbb) as a binary distribution, +and in open source, in both source and binary forms +(see https://github.com/intel/tbb). When built from source, Intel(R) TBB is intended to be highly portable and so supports a wide variety of operating systems and platforms (see @@ -25,11 +32,9 @@ Hardware - Recommended Microsoft* Windows* Systems Intel(R) Core(TM) processor family Intel(R) Xeon(R) processor family - Intel(R) Xeon Phi(TM) processor family Linux* Systems Intel(R) Core(TM) processor family Intel(R) Xeon(R) processor family - Intel(R) Xeon Phi(TM) processor family macOS* Systems Intel(R) Core(TM) processor family Android* Systems @@ -37,8 +42,8 @@ Hardware - Recommended Hardware - Supported - Intel(R) Pentium(R) 4 processor family - Intel(R) Xeon Phi(TM) coprocessor + Intel(R) Pentium(R) processor family + Intel(R) Xeon Phi(TM) processor family Intel(R) Atom(TM) processor family Non Intel(R) processors compatible with the above processors @@ -49,40 +54,34 @@ Software - Minimum Requirements Software - Recommended - Intel(R) Parallel Studio XE 2018, 2019 - Intel(R) System Studio 2018, 2019 + Intel(R) Parallel Studio XE 2019, 2020 + Intel(R) System Studio 2019, 2020 Software - Supported Operating Systems Systems with Microsoft* Windows* operating systems Microsoft* Windows* 10 - Microsoft* Windows* 8.1 - Microsoft* Windows* 7 SP1 Microsoft* Windows* Server 2016 - Microsoft* Windows* Server 2012 R2 + Microsoft* Windows* Server 2019 Systems with Linux* operating systems - CentOS 7.1 - Debian* 8, 9 - Fedora* 27 + CentOS* 7.1 + Debian* 9, 10 + Fedora* 30 Intel(R) Cluster Ready - Red Hat* Enterprise Linux* 6, 7 - SuSE* Linux* Enterprise Server 12 - Ubuntu* 14.04 LTS, 16.04 LTS, 18.04 LTS - WindRiver* Linux 8, 9 - Yocto 2.3 + Red Hat* Enterprise Linux* 7, 8 + SuSE* Linux* Enterprise Server 12,15 + Ubuntu* 16.04 LTS, 18.04 LTS + Ubuntu* 19.04 + WindRiver* Linux 9 + Yocto* 2.3 Systems with OS X* or macOS* operating systems - OS X* 10.11 - macOS* 10.12, 10.13 + macOS* 10.14, 10.15 Systems with Android* operating systems - Android* 5.x, 6.x, 7.x, 8.x + Android* 6.x, 7.x, 8.x, 9.x Software - Supported Compilers - Intel(R) C++ Compiler 17, 18 and 19 version - Microsoft* Visual C++ 12.0 (Microsoft* Visual Studio* 2013, - Windows* OS only) - Microsoft* Visual C++ 14.0 (Microsoft* Visual Studio* 2015, - Windows* OS only) + Intel(R) C++ Compiler 18, 19 and 19.1 version Microsoft* Visual C++ 14.1 (Microsoft* Visual Studio* 2017, Windows* OS only) Microsoft* Visual C++ 14.2 (Microsoft* Visual Studio* 2019, @@ -91,17 +90,17 @@ Software - Supported Compilers Microsoft* Windows* Software Development Kit for Windows* 10 For each supported Linux* operating system, the standard gcc version provided with that operating system is supported - GNU Compilers (gcc) 4.1 - 7.1 - GNU C Library (glibc) version 2.4 - 2.19 - Clang* 3.8 - 7.0 - Xcode* 7.0 - 9.1 + GNU Compilers (gcc) 4.8 - 9.1 + GNU C Library (glibc) version 2.17 - 2.29 + Clang* 3.8 - 8.0 + Xcode* 8.x - 11.x Android* NDK r10e - r17b Software - Supported Performance Analysis Tools - Intel(R) VTune(TM) Amplifier XE 2018, 2019 - Intel(R) Inspector XE 2018, 2019 - Intel(R) Advisor XE 2018, 2019 + Intel(R) VTune(TM) Amplifier XE 2019, 2020 + Intel(R) Inspector XE 2019, 2020 + Intel(R) Advisor XE 2019, 2020 Known Issues ------------ @@ -113,16 +112,21 @@ Reference for notes applicable to multiple releases of Intel(R) TBB. Library Issues - - If you build Intel(R) TBB from sources with GCC 6, specify - the -flifetime-dse=1 option to prevent crashes at runtime, - or use Intel(R) TBB makefiles that automatically set this option. + - If you build Intel(R) TBB from sources with GCC 6 or higher, + specify the -flifetime-dse=1 option to prevent crashes at + runtime, or use Intel(R) TBB makefiles that automatically set + this option. + - The copy constructor of task_arena does not propagate arena + constraints to the new task_arena object. For a workaround, + explicitly call task_arena::initialize() with the desired + constraints for the constructed object. ------------------------------------------------------------------------ (C) 2019 Intel Corporation -Intel, the Intel logo, Intel Core, Intel Atom, Xeon, Intel Xeon Phi and -Pentium are trademarks of Intel Corporation in the U.S. and/or other -countries. +Intel, the Intel logo, Intel Core, Intel Atom, Celeron, Xeon, +Intel Xeon Phi and Pentium are trademarks of Intel Corporation +in the U.S. and/or other countries * Other names and brands may be claimed as the property of others. diff --git a/include/tbb/concurrent_hash_map.h b/include/tbb/concurrent_hash_map.h index 6f695f672b..e8a617ebda 100644 --- a/include/tbb/concurrent_hash_map.h +++ b/include/tbb/concurrent_hash_map.h @@ -676,7 +676,9 @@ class concurrent_hash_map : protected internal::hash_map_base { return create_node(allocator, std::piecewise_construct, std::forward_as_tuple(key), std::forward_as_tuple()); #else - T obj; // Use of temporary object in impossible, because create_node takes non-const reference + // Use of a temporary object is impossible, because create_node takes a non-const reference. + // copy-initialization is possible because T is already required to be CopyConstructible. + T obj = T(); return create_node(allocator, key, tbb::internal::move(obj)); #endif } diff --git a/include/tbb/flow_graph.h b/include/tbb/flow_graph.h index 5526f95adf..746589c1f0 100644 --- a/include/tbb/flow_graph.h +++ b/include/tbb/flow_graph.h @@ -779,6 +779,8 @@ inline graph::graph(task_group_context& use_this_context) : my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { prepare_task_arena(); own_context = false; + cancelled = false; + caught_exception = false; my_root_task = (new (task::allocate_root(*my_context)) empty_task); my_root_task->set_ref_count(1); tbb::internal::fgt_graph(this); diff --git a/include/tbb/info.h b/include/tbb/info.h new file mode 100644 index 0000000000..0accc16577 --- /dev/null +++ b/include/tbb/info.h @@ -0,0 +1,52 @@ +/* + Copyright (c) 2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_info_H +#define __TBB_info_H + +#include "tbb_config.h" + +#if __TBB_NUMA_SUPPORT + +#include + +namespace tbb { + namespace internal { + namespace numa_topology { + unsigned nodes_count(); + void fill(int* indexes_array); + int default_concurrency(int node_id); + } //namespace numa_topology + } // namespace internal + + typedef int numa_node_id; + + namespace info { + inline std::vector numa_nodes() { + std::vector nodes_indexes(tbb::internal::numa_topology::nodes_count()); + internal::numa_topology::fill(&nodes_indexes.front()); + return nodes_indexes; + } + + inline int default_concurrency(numa_node_id id = -1) { + return internal::numa_topology::default_concurrency(id); + } + } // namespace info +} // namespace tbb + +#endif /*__TBB_NUMA_SUPPORT*/ + +#endif /*__TBB_info_H*/ diff --git a/include/tbb/internal/_aggregator_impl.h b/include/tbb/internal/_aggregator_impl.h index cece90f316..684e004c90 100644 --- a/include/tbb/internal/_aggregator_impl.h +++ b/include/tbb/internal/_aggregator_impl.h @@ -160,7 +160,7 @@ template class aggregating_functor { aggregating_class *fi; public: - aggregating_functor() {} + aggregating_functor() : fi() {} aggregating_functor(aggregating_class *fi_) : fi(fi_) {} void operator()(operation_list* op_list) { fi->handle_operations(op_list); } }; diff --git a/include/tbb/task_arena.h b/include/tbb/task_arena.h index 0127c45447..83ba9118e4 100644 --- a/include/tbb/task_arena.h +++ b/include/tbb/task_arena.h @@ -23,6 +23,9 @@ #include "task.h" #include "tbb_exception.h" #include "internal/_template_helpers.h" +#if __TBB_NUMA_SUPPORT +#include "info.h" +#endif /*__TBB_NUMA_SUPPORT*/ #if TBB_USE_THREADING_TOOLS #include "atomic.h" // for as_atomic #endif @@ -100,6 +103,18 @@ class delegated_function : public delegate_base { }; class task_arena_base { +#if __TBB_NUMA_SUPPORT +public: + // TODO: consider version approach to resolve backward compatibility potential issues. + struct constraints { + constraints(numa_node_id id = automatic, int maximal_concurrency = automatic) + : numa_id(id) + , max_concurrency(maximal_concurrency) + {} + numa_node_id numa_id; + int max_concurrency; + }; +#endif /*__TBB_NUMA_SUPPORT*/ protected: //! NULL if not currently initialized. internal::arena* my_arena; @@ -118,11 +133,30 @@ class task_arena_base { //! Special settings intptr_t my_version_and_traits; + bool my_initialized; + +#if __TBB_NUMA_SUPPORT + //! The NUMA node index to which the arena will be attached + numa_node_id my_numa_id; + + // Do not access my_numa_id without the following runtime check. + // Despite my_numa_id is accesible, it does not exist in task_arena_base on user side + // if TBB_PREVIEW_NUMA_SUPPORT macro is not defined by the user. To be sure that + // my_numa_id exists in task_arena_base layout we check the traits. + // TODO: Consider increasing interface version for task_arena_base instead of this runtime check. + numa_node_id numa_id() { + return (my_version_and_traits & numa_support_flag) == numa_support_flag ? my_numa_id : automatic; + } +#endif + enum { default_flags = 0 #if __TBB_TASK_GROUP_CONTEXT | (task_group_context::default_traits & task_group_context::exact_exception) // 0 or 1 << 16 , exact_exception_flag = task_group_context::exact_exception // used to specify flag for context directly +#endif +#if __TBB_NUMA_SUPPORT + , numa_support_flag = 1 #endif }; @@ -133,9 +167,31 @@ class task_arena_base { #endif , my_max_concurrency(max_concurrency) , my_master_slots(reserved_for_masters) +#if __TBB_NUMA_SUPPORT + , my_version_and_traits(default_flags | numa_support_flag) +#else , my_version_and_traits(default_flags) +#endif + , my_initialized(false) +#if __TBB_NUMA_SUPPORT + , my_numa_id(automatic) +#endif {} +#if __TBB_NUMA_SUPPORT + task_arena_base(const constraints& constraints_, unsigned reserved_for_masters) + : my_arena(0) +#if __TBB_TASK_GROUP_CONTEXT + , my_context(0) +#endif + , my_max_concurrency(constraints_.max_concurrency) + , my_master_slots(reserved_for_masters) + , my_version_and_traits(default_flags | numa_support_flag) + , my_initialized(false) + , my_numa_id(constraints_.numa_id ) + {} +#endif /*__TBB_NUMA_SUPPORT*/ + void __TBB_EXPORTED_METHOD internal_initialize(); void __TBB_EXPORTED_METHOD internal_terminate(); void __TBB_EXPORTED_METHOD internal_attach(); @@ -177,7 +233,6 @@ class task_arena : public internal::task_arena_base { #endif ); friend int tbb::this_task_arena::max_concurrency(); - bool my_initialized; void mark_initialized() { __TBB_ASSERT( my_arena, "task_arena initialization is incomplete" ); #if __TBB_TASK_GROUP_CONTEXT @@ -226,13 +281,18 @@ class task_arena : public internal::task_arena_base { **/ task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1) : task_arena_base(max_concurrency_, reserved_for_masters) - , my_initialized(false) {} +#if __TBB_NUMA_SUPPORT + //! Creates task arena pinned to certain NUMA node + task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1) + : task_arena_base(constraints_, reserved_for_masters) + {} +#endif /*__TBB_NUMA_SUPPORT*/ + //! Copies settings from another task_arena task_arena(const task_arena &s) // copy settings but not the reference or instance : task_arena_base(s.my_max_concurrency, s.my_master_slots) - , my_initialized(false) {} //! Tag class used to indicate the "attaching" constructor @@ -241,7 +301,6 @@ class task_arena : public internal::task_arena_base { //! Creates an instance of task_arena attached to the current arena of the thread explicit task_arena( attach ) : task_arena_base(automatic, 1) // use default settings if attach fails - , my_initialized(false) { internal_attach(); if( my_arena ) my_initialized = true; @@ -266,6 +325,19 @@ class task_arena : public internal::task_arena_base { } } +#if __TBB_NUMA_SUPPORT + inline void initialize(constraints constraints_, unsigned reserved_for_masters = 1) { + // TODO: decide if this call must be thread-safe + __TBB_ASSERT(!my_arena, "Impossible to modify settings of an already initialized task_arena"); + if( !my_initialized ) { + my_numa_id = constraints_.numa_id; + my_max_concurrency = constraints_.max_concurrency; + my_master_slots = reserved_for_masters; + initialize(); + } + } +#endif /*__TBB_NUMA_SUPPORT*/ + //! Attaches this instance to the current arena of the thread inline void initialize(attach) { // TODO: decide if this call must be thread-safe diff --git a/include/tbb/tbb_config.h b/include/tbb/tbb_config.h index c2f7e6e32e..7b1f02fdd6 100644 --- a/include/tbb/tbb_config.h +++ b/include/tbb/tbb_config.h @@ -214,9 +214,7 @@ /** TODO: extend exception_ptr related conditions to cover libstdc++ **/ #define __TBB_EXCEPTION_PTR_PRESENT (__cplusplus >= 201103L && (_LIBCPP_VERSION || __TBB_GLIBCXX_VERSION >= 40600)) #define __TBB_STATIC_ASSERT_PRESENT __has_feature(__cxx_static_assert__) - /**Clang (preprocessor) has problems with dealing with expression having __has_include in #ifs - * used inside C++ code. (At least version that comes with OS X 10.8 : Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn)) **/ - #if (__GXX_EXPERIMENTAL_CXX0X__ && __has_include()) + #if (__cplusplus >= 201103L && __has_include()) #define __TBB_CPP11_TUPLE_PRESENT 1 #endif #if (__has_feature(__cxx_generalized_initializers__) && __has_include()) @@ -577,6 +575,10 @@ There are four cases that are supported: #error __TBB_TASK_PRIORITY requires __TBB_TASK_GROUP_CONTEXT to be enabled #endif +#if TBB_PREVIEW_NUMA_SUPPORT || __TBB_BUILD + #define __TBB_NUMA_SUPPORT 1 +#endif + #if TBB_PREVIEW_WAITING_FOR_WORKERS || __TBB_BUILD #define __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 1 #endif @@ -617,10 +619,6 @@ There are four cases that are supported: // Intel C++ Compiler starts analyzing usages of the deprecated content at the template // instantiation site, which is too late for suppression of the corresponding messages for internal // stuff. -#ifndef TBB_SUPPRESS_DEPRECATED_MESSAGES -#define TBB_SUPPRESS_DEPRECATED_MESSAGES 1 -#endif - #if !defined(__INTEL_COMPILER) && (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) #if (__cplusplus >= 201402L) #define __TBB_DEPRECATED [[deprecated]] diff --git a/include/tbb/tbb_exception.h b/include/tbb/tbb_exception.h index ce6e092e47..d9045c474c 100644 --- a/include/tbb/tbb_exception.h +++ b/include/tbb/tbb_exception.h @@ -228,7 +228,7 @@ class __TBB_DEPRECATED captured_exception : public tbb_exception private: //! Used only by method move(). - captured_exception() {} + captured_exception() : my_dynamic(), my_exception_name(), my_exception_info() {} //! Functionally equivalent to {captured_exception e(name,info); return e.move();} static captured_exception* allocate( const char* name, const char* info ); diff --git a/include/tbb/tbb_stddef.h b/include/tbb/tbb_stddef.h index 86cb42f615..749ea1f45c 100644 --- a/include/tbb/tbb_stddef.h +++ b/include/tbb/tbb_stddef.h @@ -18,11 +18,11 @@ #define __TBB_tbb_stddef_H // Marketing-driven product version -#define TBB_VERSION_MAJOR 2019 -#define TBB_VERSION_MINOR 9 +#define TBB_VERSION_MAJOR 2020 +#define TBB_VERSION_MINOR 0 // Engineering-focused interface version -#define TBB_INTERFACE_VERSION 11009 +#define TBB_INTERFACE_VERSION 11100 #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000 // The oldest major interface version still supported diff --git a/src/Makefile b/src/Makefile index cd82fe3fec..0e61b131c6 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,17 +20,19 @@ include $(tbb_root)/build/common.inc #According to documentation, recursively invoked make commands can process their targets in parallel .NOTPARALLEL: -.PHONY: all tbb tbbmalloc tbbproxy test test_no_depends release debug examples clean +.PHONY: all tbb tbbmalloc tbbbind tbbproxy test test_no_depends release debug examples clean mkdir -all: release debug examples +all: release examples tbb: tbb_release tbb_debug tbbmalloc: tbbmalloc_release tbbmalloc_debug -tbbproxy: tbbproxy_release tbbproxy_debug +tbbbind: tbbbind_release -rml: rml_release rml_debug +tbbproxy: tbbproxy_release + +rml: rml_release test: tbbmalloc_test_release $(if $(use_proxy),tbbproxy_test_release) tbb_test_release tbbmalloc_test_debug $(if $(use_proxy),tbbproxy_test_debug) tbb_test_debug ifeq (,$(findstring skip,$(target:android=skip) $(offload:mic=skip))) @@ -53,6 +55,8 @@ examples_no_depends: examples_release_no_depends examples_debug_no_depends clean: clean_release clean_debug clean_examples @echo clean done +mkdir: mkdir_debug mkdir_release + .PHONY: full full: $(MAKE) -sir --no-print-directory -f Makefile tbb_root=.. clean all @@ -95,6 +99,7 @@ python_%: mkdir_release .PHONY: test_release test_debug test_release_no_depends test_debug_no_depends .PHONY: tbb_release tbb_debug tbb_test_release tbb_test_debug tbb_test_release_no_depends tbb_test_debug_no_depends +.PHONY: tbbbind_release tbbbind_debug # do not delete double-space after -C option tbb_release: mkdir_release $(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbb cfg=release @@ -117,6 +122,12 @@ test_debug: tbb_test_debug test_release_no_depends: tbb_test_release_no_depends test_debug_no_depends: tbb_test_debug_no_depends +tbbbind_release: mkdir_release + $(MAKE) -C "$(work_dir)_release" -r -f $(tbb_root)/build/Makefile.tbbbind cfg=release tbbbind + +tbbbind_debug: mkdir_debug + $(MAKE) -C "$(work_dir)_debug" -r -f $(tbb_root)/build/Makefile.tbbbind cfg=debug tbbbind + .PHONY: tbbmalloc_release tbbmalloc_debug .PHONY: tbbmalloc_dll_release tbbmalloc_dll_debug tbbmalloc_proxy_dll_release tbbmalloc_proxy_dll_debug .PHONY: tbbmalloc_test tbbmalloc_test_release tbbmalloc_test_debug tbbmalloc_test_release_no_depends tbbmalloc_test_debug_no_depends diff --git a/src/perf/time_resumable_tasks.cpp b/src/perf/time_resumable_tasks.cpp index f3a6933ecf..0de2660363 100644 --- a/src/perf/time_resumable_tasks.cpp +++ b/src/perf/time_resumable_tasks.cpp @@ -310,7 +310,7 @@ int main() { // No any work + just resumable tasks feature (true) ScalabilityBenchmark(0); - // Native implmentation + // Native implementation // Big work size BenchNativeImpl(100000); // Small work size diff --git a/src/tbb/arena.cpp b/src/tbb/arena.cpp index e192256634..ab905d85fa 100644 --- a/src/tbb/arena.cpp +++ b/src/tbb/arena.cpp @@ -812,7 +812,11 @@ namespace internal { void task_arena_base::internal_initialize( ) { governor::one_time_init(); if( my_max_concurrency < 1 ) +#if __TBB_NUMA_SUPPORT + my_max_concurrency = tbb::internal::numa_topology::default_concurrency(numa_id()); +#else /*__TBB_NUMA_SUPPORT*/ my_max_concurrency = (int)governor::default_num_threads(); +#endif /*__TBB_NUMA_SUPPORT*/ __TBB_ASSERT( my_master_slots <= (unsigned)my_max_concurrency, "Number of slots reserved for master should not exceed arena concurrency"); arena* new_arena = market::create_arena( my_max_concurrency, my_master_slots, 0 ); // add an internal market reference; a public reference was added in create_arena @@ -833,17 +837,32 @@ void task_arena_base::internal_initialize( ) { new_arena->on_thread_leaving(); // destroy unneeded arena #if __TBB_TASK_GROUP_CONTEXT spin_wait_while_eq(my_context, (task_group_context*)NULL); +#endif /*__TBB_TASK_GROUP_CONTEXT*/ +#if __TBB_TASK_GROUP_CONTEXT || __TBB_NUMA_SUPPORT } else { +#if __TBB_TASK_GROUP_CONTEXT new_arena->my_default_ctx->my_version_and_traits |= my_version_and_traits & exact_exception_flag; as_atomic(my_context) = new_arena->my_default_ctx; -#endif +#endif /*__TBB_TASK_GROUP_CONTEXT*/ +#if __TBB_NUMA_SUPPORT + my_arena->my_numa_binding_observer = tbb::internal::construct_binding_observer( + static_cast(this), numa_id(), my_arena->my_num_slots); +#endif /*__TBB_NUMA_SUPPORT*/ } +#endif /*__TBB_TASK_GROUP_CONTEXT || __TBB_NUMA_SUPPORT*/ + // TODO: should it trigger automatic initialization of this thread? governor::local_scheduler_weak(); } void task_arena_base::internal_terminate( ) { if( my_arena ) {// task_arena was initialized +#if __TBB_NUMA_SUPPORT + if( my_arena->my_numa_binding_observer != NULL ) { + tbb::internal::destroy_binding_observer(my_arena->my_numa_binding_observer); + my_arena->my_numa_binding_observer = NULL; + } +#endif /*__TBB_NUMA_SUPPORT*/ my_arena->my_market->release( /*is_public=*/true, /*blocking_terminate=*/false ); my_arena->on_thread_leaving(); my_arena = 0; diff --git a/src/tbb/arena.h b/src/tbb/arena.h index 44f22f7d94..4718575e7c 100644 --- a/src/tbb/arena.h +++ b/src/tbb/arena.h @@ -195,6 +195,11 @@ struct arena_base : padded { observer_list my_observers; #endif +#if __TBB_NUMA_SUPPORT + //! Pointer to internal observer that allows to bind threads in arena to certain NUMA node. + task_scheduler_observer* my_numa_binding_observer; +#endif /*__TBB_NUMA_SUPPORT*/ + #if __TBB_TASK_PRIORITY //! The lowest normalized priority of available spawned or enqueued tasks. intptr_t my_bottom_priority; diff --git a/src/tbb/co_context.h b/src/tbb/co_context.h index 50c2a2c95f..fe44c37461 100644 --- a/src/tbb/co_context.h +++ b/src/tbb/co_context.h @@ -160,7 +160,7 @@ inline void create_coroutine(coroutine_type& c, size_t stack_size, void* arg) { uintptr_t stack_ptr = (uintptr_t)mmap(NULL, protected_stack_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); __TBB_ASSERT((void*)stack_ptr != MAP_FAILED, NULL); - // Alow read write on our stack (guarded pages are still protected) + // Allow read write on our stack (guarded pages are still protected) int err = mprotect((void*)(stack_ptr + REG_PAGE_SIZE), page_aligned_stack_size, PROT_READ | PROT_WRITE); __TBB_ASSERT_EX(!err, NULL); diff --git a/src/tbb/governor.cpp b/src/tbb/governor.cpp index 029d0831c7..5ccee9177c 100644 --- a/src/tbb/governor.cpp +++ b/src/tbb/governor.cpp @@ -293,6 +293,154 @@ __cilk_tbb_retcode governor::stack_op_handler( __cilk_tbb_stack_op op, void* dat } #endif /* __TBB_SURVIVE_THREAD_SWITCH */ +#if __TBB_NUMA_SUPPORT + +#if __TBB_WEAK_SYMBOLS_PRESENT +#pragma weak initialize_numa_topology +#pragma weak subscribe_arena +#pragma weak unsubscribe_arena + +extern "C" { +void initialize_numa_topology( + size_t groups_num, int& nodes_count, int*& indexes_list, int*& concurrency_list ); +tbb::interface6::task_scheduler_observer* subscribe_arena( + tbb::interface7::task_arena* ta, int numa_id, int num_slots ); +void unsubscribe_arena( tbb::interface6::task_scheduler_observer* numa_binding_observer ); +} +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + +// Handlers for communication with TBBbind +#if _WIN32 || _WIN64 || __linux__ +static void (*initialize_numa_topology_handler)( + size_t groups_num, int& nodes_count, int*& indexes_list, int*& concurrency_list ) = NULL; +#endif /* _WIN32 || _WIN64 || __linux__ */ + +static tbb::interface6::task_scheduler_observer* (*subscribe_arena_handler)( + tbb::interface7::task_arena* ta, int numa_id, int num_slots ) = NULL; + +static void (*unsubscribe_arena_handler)( + tbb::interface6::task_scheduler_observer* numa_binding_observer ) = NULL; + +#if _WIN32 || _WIN64 || __linux__ +// Table describing how to link the handlers. +static const dynamic_link_descriptor TbbBindLinkTable[] = { + DLD(initialize_numa_topology, initialize_numa_topology_handler), + DLD(subscribe_arena, subscribe_arena_handler), + DLD(unsubscribe_arena, unsubscribe_arena_handler) +}; + +#if TBB_USE_DEBUG +#define DEBUG_SUFFIX "_debug" +#else +#define DEBUG_SUFFIX +#endif /* TBB_USE_DEBUG */ + +#if _WIN32 || _WIN64 +#define TBBBIND_NAME "tbbbind" DEBUG_SUFFIX ".dll" +#elif __linux__ +#define TBBBIND_NAME "libtbbbind" DEBUG_SUFFIX __TBB_STRING(.so.TBB_COMPATIBLE_INTERFACE_VERSION) +#endif /* __linux__ */ +#endif /* _WIN32 || _WIN64 || __linux__ */ + +// Stubs that will be used if TBBbind library is unavailable. +static tbb::interface6::task_scheduler_observer* dummy_subscribe_arena ( + tbb::interface7::task_arena*, int, int ) { return NULL; } +static void dummy_unsubscribe_arena( tbb::interface6::task_scheduler_observer* ) {} + +// Representation of NUMA topology information on the TBB side. +// NUMA topology may be initialized by third-party component (e.g. hwloc) +// or just filled by default stubs (1 NUMA node with 0 index and +// default_num_threads value as default_concurrency). +namespace numa_topology { +namespace { +int numa_nodes_count = 0; +int* numa_indexes = NULL; +int* default_concurrency_list = NULL; +static tbb::atomic numa_topology_init_state; +} // internal namespace + +// Tries to load TBBbind library API, if success, gets NUMA topology information from it, +// in another case, fills NUMA topology by stubs. +// TODO: Add TBBbind loading status if TBB_VERSION is set. +void initialization_impl() { + governor::one_time_init(); + +#if _WIN32 || _WIN64 || __linux__ + bool load_tbbbind = true; +#if _WIN32 && !_WIN64 + // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs. + SYSTEM_INFO si; + GetNativeSystemInfo(&si); + load_tbbbind = si.dwNumberOfProcessors <= 32; +#endif /* _WIN32 && !_WIN64 */ + + if (load_tbbbind && dynamic_link(TBBBIND_NAME, TbbBindLinkTable, 3)) { + int number_of_groups = 1; +#if _WIN32 || _WIN64 + number_of_groups = NumberOfProcessorGroups(); +#endif /* _WIN32 || _WIN64 */ + initialize_numa_topology_handler( + number_of_groups, numa_nodes_count, numa_indexes, default_concurrency_list); + + if (numa_nodes_count==1 && numa_indexes[0] >= 0) { + __TBB_ASSERT(default_concurrency_list[numa_indexes[0]] == (int)governor::default_num_threads(), + "default_concurrency() should be equal to governor::default_num_threads() on single" + "NUMA node systems."); + } + return; + } +#endif /* _WIN32 || _WIN64 || __linux__ */ + + static int dummy_index = -1; + static int dummy_concurrency = governor::default_num_threads(); + + numa_nodes_count = 1; + numa_indexes = &dummy_index; + default_concurrency_list = &dummy_concurrency; + + subscribe_arena_handler = dummy_subscribe_arena; + unsubscribe_arena_handler = dummy_unsubscribe_arena; +} + +void initialize() { + atomic_do_once(initialization_impl, numa_topology_init_state); +} + +unsigned nodes_count() { + initialize(); + return numa_nodes_count; +} + +void fill( int* indexes_array ) { + initialize(); + for ( int i = 0; i < numa_nodes_count; i++ ) { + indexes_array[i] = numa_indexes[i]; + } +} + +int default_concurrency( int node_id ) { + if (node_id >= 0) { + initialize(); + return default_concurrency_list[node_id]; + } + return governor::default_num_threads(); +} + +} // namespace numa_topology + +tbb::interface6::task_scheduler_observer* construct_binding_observer( tbb::interface7::task_arena* ta, + int numa_id, int num_slots ) { + // numa_topology initialization will be lazily performed inside nodes_count() call + return (numa_id >= 0 && numa_topology::nodes_count() > 1) ? + subscribe_arena_handler(ta, numa_id, num_slots) : NULL; +} + +void destroy_binding_observer( tbb::interface6::task_scheduler_observer* observer ) { + __TBB_ASSERT(observer != NULL, "Trying to access observer via NULL pointer"); + unsubscribe_arena_handler(observer); +} +#endif /* __TBB_NUMA_SUPPORT */ + } // namespace internal //------------------------------------------------------------------------ diff --git a/src/tbb/lin32-tbb-export.lst b/src/tbb/lin32-tbb-export.lst index 20413b87af..25d1bb15a8 100644 --- a/src/tbb/lin32-tbb-export.lst +++ b/src/tbb/lin32-tbb-export.lst @@ -68,6 +68,11 @@ __TBB_SYMBOL( _ZNK3tbb10interface78internal15task_arena_base16internal_executeER __TBB_SYMBOL( _ZNK3tbb10interface78internal15task_arena_base13internal_waitEv ) __TBB_SYMBOL( _ZN3tbb10interface78internal15task_arena_base21internal_current_slotEv ) __TBB_SYMBOL( _ZN3tbb10interface78internal15task_arena_base24internal_max_concurrencyEPKNS0_10task_arenaE ) +#if __TBB_NUMA_SUPPORT +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology11nodes_countEv ) +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology4fillEPi ) +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology19default_concurrencyEi ) +#endif /*__TBB_NUMA_SUPPORT*/ #if __TBB_TASK_ISOLATION __TBB_SYMBOL( _ZN3tbb10interface78internal20isolate_within_arenaERNS1_13delegate_baseEi ) #endif /* __TBB_TASK_ISOLATION */ diff --git a/src/tbb/lin32-tbbbind-export.def b/src/tbb/lin32-tbbbind-export.def new file mode 100644 index 0000000000..cba4e4f008 --- /dev/null +++ b/src/tbb/lin32-tbbbind-export.def @@ -0,0 +1,22 @@ +/* + Copyright (c) 2005-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{ +global: +initialize_numa_topology; +subscribe_arena; +unsubscribe_arena; +}; diff --git a/src/tbb/lin64-tbb-export.lst b/src/tbb/lin64-tbb-export.lst index 1b2f9cd469..080836b792 100644 --- a/src/tbb/lin64-tbb-export.lst +++ b/src/tbb/lin64-tbb-export.lst @@ -68,6 +68,11 @@ __TBB_SYMBOL( _ZNK3tbb10interface78internal15task_arena_base16internal_executeER __TBB_SYMBOL( _ZNK3tbb10interface78internal15task_arena_base13internal_waitEv ) __TBB_SYMBOL( _ZN3tbb10interface78internal15task_arena_base21internal_current_slotEv ) __TBB_SYMBOL( _ZN3tbb10interface78internal15task_arena_base24internal_max_concurrencyEPKNS0_10task_arenaE ) +#if __TBB_NUMA_SUPPORT +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology11nodes_countEv ) +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology4fillEPi ) +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology19default_concurrencyEi ) +#endif /*__TBB_NUMA_SUPPORT*/ #if __TBB_TASK_ISOLATION __TBB_SYMBOL( _ZN3tbb10interface78internal20isolate_within_arenaERNS1_13delegate_baseEl ) #endif diff --git a/src/tbb/lin64-tbbbind-export.def b/src/tbb/lin64-tbbbind-export.def new file mode 100644 index 0000000000..cba4e4f008 --- /dev/null +++ b/src/tbb/lin64-tbbbind-export.def @@ -0,0 +1,22 @@ +/* + Copyright (c) 2005-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{ +global: +initialize_numa_topology; +subscribe_arena; +unsubscribe_arena; +}; diff --git a/src/tbb/mac32-tbb-export.lst b/src/tbb/mac32-tbb-export.lst index cc1c3e6da2..907ddb930c 100644 --- a/src/tbb/mac32-tbb-export.lst +++ b/src/tbb/mac32-tbb-export.lst @@ -76,6 +76,11 @@ __TBB_SYMBOL( _ZNK3tbb10interface78internal15task_arena_base16internal_executeER __TBB_SYMBOL( _ZNK3tbb10interface78internal15task_arena_base13internal_waitEv ) __TBB_SYMBOL( _ZN3tbb10interface78internal15task_arena_base21internal_current_slotEv ) __TBB_SYMBOL( _ZN3tbb10interface78internal15task_arena_base24internal_max_concurrencyEPKNS0_10task_arenaE ) +#if __TBB_NUMA_SUPPORT +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology11nodes_countEv ) +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology4fillEPi ) +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology19default_concurrencyEi ) +#endif /*__TBB_NUMA_SUPPORT*/ #if __TBB_TASK_ISOLATION __TBB_SYMBOL( _ZN3tbb10interface78internal20isolate_within_arenaERNS1_13delegate_baseEl ) #endif /* __TBB_TASK_ISOLATION */ diff --git a/src/tbb/mac64-tbb-export.lst b/src/tbb/mac64-tbb-export.lst index 1761f31411..35da945410 100644 --- a/src/tbb/mac64-tbb-export.lst +++ b/src/tbb/mac64-tbb-export.lst @@ -76,6 +76,11 @@ __TBB_SYMBOL( _ZNK3tbb10interface78internal15task_arena_base16internal_executeER __TBB_SYMBOL( _ZNK3tbb10interface78internal15task_arena_base13internal_waitEv ) __TBB_SYMBOL( _ZN3tbb10interface78internal15task_arena_base21internal_current_slotEv ) __TBB_SYMBOL( _ZN3tbb10interface78internal15task_arena_base24internal_max_concurrencyEPKNS0_10task_arenaE ) +#if __TBB_NUMA_SUPPORT +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology11nodes_countEv ) +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology4fillEPi ) +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology19default_concurrencyEi ) +#endif /*__TBB_NUMA_SUPPORT*/ #if __TBB_TASK_ISOLATION __TBB_SYMBOL( _ZN3tbb10interface78internal20isolate_within_arenaERNS1_13delegate_baseEl ) #endif /* __TBB_TASK_ISOLATION */ diff --git a/src/tbb/observer_proxy.cpp b/src/tbb/observer_proxy.cpp index a214155e72..3e8e16c1ad 100644 --- a/src/tbb/observer_proxy.cpp +++ b/src/tbb/observer_proxy.cpp @@ -304,7 +304,8 @@ void task_scheduler_observer_v3::observe( bool enable ) { intptr_t tag = my_proxy->get_v6_observer()->my_context_tag; if( tag != interface6::task_scheduler_observer::implicit_tag ) { // explicit arena task_arena *a = reinterpret_cast(tag); - a->initialize(); + if ( a->my_arena==NULL ) // Avoid recursion during arena initialization + a->initialize(); my_proxy->my_list = &a->my_arena->my_observers; } else { if( !(s && s->my_arena) ) diff --git a/src/tbb/scheduler.h b/src/tbb/scheduler.h index feb1948602..ce23e4f0c9 100644 --- a/src/tbb/scheduler.h +++ b/src/tbb/scheduler.h @@ -846,6 +846,12 @@ inline void generic_scheduler::resume(generic_scheduler& target) { // Transfer thread related data. target.my_arena_index = my_arena_index; target.my_arena_slot = my_arena_slot; +#if __TBB_SCHEDULER_OBSERVER + target.my_last_global_observer = my_last_global_observer; +#endif +#if __TBB_ARENA_OBSERVER + target.my_last_local_observer = my_last_local_observer; +#endif target.attach_mailbox(affinity_id(target.my_arena_index + 1)); #if __TBB_TASK_PRIORITY diff --git a/src/tbb/task_group_context.cpp b/src/tbb/task_group_context.cpp index d6097251a9..8cab144bab 100644 --- a/src/tbb/task_group_context.cpp +++ b/src/tbb/task_group_context.cpp @@ -207,6 +207,8 @@ void task_group_context::init () { __TBB_ASSERT ( (uintptr_t(this) & (sizeof(my_cancellation_requested) - 1)) == 0, "Context is improperly aligned" ); __TBB_ASSERT ( __TBB_load_relaxed(my_kind) == isolated || __TBB_load_relaxed(my_kind) == bound, "Context can be created only as isolated or bound" ); my_parent = NULL; + my_node.my_next = NULL; + my_node.my_prev = NULL; my_cancellation_requested = 0; my_exception = NULL; my_owner = NULL; diff --git a/src/tbb/tbb_misc.h b/src/tbb/tbb_misc.h index 632a632e52..40bd78bcd4 100644 --- a/src/tbb/tbb_misc.h +++ b/src/tbb/tbb_misc.h @@ -21,6 +21,10 @@ #include "tbb/tbb_machine.h" #include "tbb/atomic.h" // For atomic_xxx definitions +#if __TBB_NUMA_SUPPORT +#include "tbb/info.h" +#endif /*__TBB_NUMA_SUPPORT*/ + #if __linux__ || __FreeBSD__ #include // __FreeBSD_version #if __FreeBSD_version >= 701000 @@ -35,6 +39,12 @@ #define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__) namespace tbb { + +#if __TBB_NUMA_SUPPORT +namespace interface7 { class task_arena; } +namespace interface6 { class task_scheduler_observer; } +#endif /*__TBB_NUMA_SUPPORT*/ + namespace internal { const size_t MByte = 1024*1024; @@ -265,6 +275,26 @@ bool cpu_has_speculation(); bool gcc_rethrow_exception_broken(); void fix_broken_rethrow(); +#if __TBB_NUMA_SUPPORT +// Interfaces for binding threads to certain NUMA nodes by third-party library interfaces. +// - construct_binding_observer() returns pointer to constructed and enabled (observe(true) was called) observer +// that binds incoming thread during on_scheduler_entry() call and returns old affinity mask +// during on_scheduler_exit() call +// - destroy_binding_observer() deactivates, destroys and deallocates observer that was described earlier. +// If requested third party library does not exist on the system, then they are still may be called but do nothing. +tbb::interface6::task_scheduler_observer* construct_binding_observer( + tbb::interface7::task_arena* ta, int numa_id, int num_slots ); + +void destroy_binding_observer( tbb::interface6::task_scheduler_observer* observer ); + +namespace numa_topology { + bool is_initialized(); + void initialize(); + void destroy(); +} + +#endif /*__TBB_NUMA_SUPPORT*/ + } // namespace internal } // namespace tbb diff --git a/src/tbb/tbbbind.cpp b/src/tbb/tbbbind.cpp new file mode 100644 index 0000000000..05420a29f9 --- /dev/null +++ b/src/tbb/tbbbind.cpp @@ -0,0 +1,309 @@ +/* + Copyright (c) 2005-2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "tbb/tbb_stddef.h" // For correct linking with TBB on Windows + +#include "tbb/task_arena.h" +#include "tbb/task_scheduler_observer.h" +#include "tbb/tbb_allocator.h" + +#if _MSC_VER && !__INTEL_COMPILER +#pragma warning( push ) +#pragma warning( disable : 4100 ) +#endif +#include +#if _MSC_VER && !__INTEL_COMPILER +#pragma warning( pop ) +#endif + +#include + +// Most of hwloc calls returns negative exit code on error. +// This macro tracks error codes that are returned from the hwloc interfaces. +#define assertion_hwloc_wrapper(command, ...) \ + __TBB_ASSERT_EX( (command(__VA_ARGS__)) >= 0, "Error occurred during call to hwloc API."); + +namespace tbb { +namespace internal { + +//------------------------------------------------------------------------ +// Information about the machine's hardware TBB is happen to work on +//------------------------------------------------------------------------ +class platform_topology { + friend class numa_affinity_handler; + + static hwloc_topology_t topology; + static hwloc_cpuset_t process_cpu_affinity_mask; + static hwloc_nodeset_t process_node_affinity_mask; + static std::vector affinity_masks_list; + + static std::vector default_concurrency_list; + static std::vector numa_indexes_list; + static int numa_nodes_count; + + enum init_stages { uninitialized, started, topology_allocated, topology_loaded, topology_parsed }; + static init_stages initialization_state; + + // Binding threads to NUMA nodes that locates in another Windows Processor groups + // is allowed only if machine topology contains several Windows Processors groups + // and process affinity mask wasn`t limited manually (affinity mask cannot violates + // processors group boundaries). + static bool intergroup_binding_allowed(size_t groups_num) { return groups_num > 1; } + +public: + typedef hwloc_cpuset_t affinity_mask; + typedef hwloc_const_cpuset_t const_affinity_mask; + + static bool is_topology_parsed() { return initialization_state == topology_parsed; } + + static void initialize( size_t groups_num ) { + if ( initialization_state != uninitialized ) + return; + initialization_state = started; + + // Parse topology + if ( hwloc_topology_init( &topology ) == 0 ) { + initialization_state = topology_allocated; + if ( hwloc_topology_load( topology ) == 0 ) { + initialization_state = topology_loaded; + } + } + + // Fill parameters by stubs if topology parsing brokes. + if ( initialization_state != topology_loaded ) { + if ( initialization_state == topology_allocated ) { + hwloc_topology_destroy(topology); + } + numa_nodes_count = 1; + numa_indexes_list.push_back(-1); + default_concurrency_list.push_back(-1); + return; + } + + // Getting process affinity mask + if ( intergroup_binding_allowed(groups_num) ) { + process_cpu_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology)); + process_node_affinity_mask = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology)); + } else { + process_cpu_affinity_mask = hwloc_bitmap_alloc(); + process_node_affinity_mask = hwloc_bitmap_alloc(); + + assertion_hwloc_wrapper(hwloc_get_cpubind, topology, process_cpu_affinity_mask, 0); + hwloc_cpuset_to_nodeset(topology, process_cpu_affinity_mask, process_node_affinity_mask); + } + + // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap. + // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check + // to change way of topology initialization. + if (hwloc_bitmap_weight(process_node_affinity_mask) < 0) { + numa_nodes_count = 1; + numa_indexes_list.push_back(0); + default_concurrency_list.push_back(hwloc_bitmap_weight(process_cpu_affinity_mask)); + + affinity_masks_list.push_back(hwloc_bitmap_dup(process_cpu_affinity_mask)); + initialization_state = topology_parsed; + return; + } + + // Get number of available NUMA nodes + numa_nodes_count = hwloc_bitmap_weight(process_node_affinity_mask); + __TBB_ASSERT(numa_nodes_count > 0, "Any system must contain one or more NUMA nodes"); + + // Get NUMA logical indexes list + unsigned counter = 0; + int i = 0; + int max_numa_index = -1; + numa_indexes_list.resize(numa_nodes_count); + hwloc_obj_t node_buffer; + hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) { + node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i); + numa_indexes_list[counter] = static_cast(node_buffer->logical_index); + + if ( numa_indexes_list[counter] > max_numa_index ) { + max_numa_index = numa_indexes_list[counter]; + } + + counter++; + } hwloc_bitmap_foreach_end(); + __TBB_ASSERT(max_numa_index >= 0, "Maximal NUMA index must not be negative"); + + // Fill concurrency and affinity masks lists + default_concurrency_list.resize(max_numa_index + 1); + affinity_masks_list.resize(max_numa_index + 1); + + int index = 0; + hwloc_bitmap_foreach_begin(i, process_node_affinity_mask) { + node_buffer = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i); + index = static_cast(node_buffer->logical_index); + + hwloc_cpuset_t& current_mask = affinity_masks_list[index]; + current_mask = hwloc_bitmap_dup(node_buffer->cpuset); + + hwloc_bitmap_and(current_mask, current_mask, process_cpu_affinity_mask); + __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), "hwloc detected unavailable NUMA node"); + default_concurrency_list[index] = hwloc_bitmap_weight(current_mask); + } hwloc_bitmap_foreach_end(); + initialization_state = topology_parsed; + } + + ~platform_topology() { + if ( is_topology_parsed() ) { + for (int i = 0; i < numa_nodes_count; i++) { + hwloc_bitmap_free(affinity_masks_list[numa_indexes_list[i]]); + } + hwloc_bitmap_free(process_node_affinity_mask); + hwloc_bitmap_free(process_cpu_affinity_mask); + } + + if ( initialization_state >= topology_allocated ) { + hwloc_topology_destroy(topology); + } + + initialization_state = uninitialized; + } + + static void fill(int& nodes_count, int*& indexes_list, int*& concurrency_list ) { + __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology"); + nodes_count = numa_nodes_count; + indexes_list = &numa_indexes_list.front(); + concurrency_list = &default_concurrency_list.front(); + } + + static affinity_mask allocate_process_affinity_mask() { + __TBB_ASSERT(is_topology_parsed(), "Trying to get access to uninitialized platform_topology"); + return hwloc_bitmap_dup(process_cpu_affinity_mask); + } + + static void free_affinity_mask( affinity_mask mask_to_free ) { + hwloc_bitmap_free(mask_to_free); // If bitmap is NULL, no operation is performed. + } + + static void store_current_affinity_mask( affinity_mask current_mask ) { + assertion_hwloc_wrapper(hwloc_get_cpubind, topology, current_mask, HWLOC_CPUBIND_THREAD); + + hwloc_bitmap_and(current_mask,current_mask, process_cpu_affinity_mask); + __TBB_ASSERT(!hwloc_bitmap_iszero(current_mask), + "Current affinity mask must intersects with process affinity mask"); + } + + static void set_new_affinity_mask( const_affinity_mask new_mask ) { + assertion_hwloc_wrapper(hwloc_set_cpubind, topology, new_mask, HWLOC_CPUBIND_THREAD); + } + + static const_affinity_mask get_node_affinity_mask( int node_index ) { + __TBB_ASSERT((int)affinity_masks_list.size() > node_index, + "Trying to get affinity mask for uninitialized NUMA node"); + return affinity_masks_list[node_index]; + } +}; + +hwloc_topology_t platform_topology::topology = NULL; +hwloc_cpuset_t platform_topology::process_cpu_affinity_mask = NULL; +hwloc_nodeset_t platform_topology::process_node_affinity_mask = NULL; +std::vector platform_topology::affinity_masks_list; + +std::vector platform_topology::default_concurrency_list; +std::vector platform_topology::numa_indexes_list; +int platform_topology::numa_nodes_count = 0; + +platform_topology::init_stages platform_topology::initialization_state = uninitialized; + +class numa_affinity_handler { + // Following vector saves thread affinity mask on scheduler entry to return it to this thread + // on scheduler exit. + typedef std::vector affinity_masks_container; + affinity_masks_container affinity_backup; + +public: + numa_affinity_handler( size_t size ) : affinity_backup(size) { + for (affinity_masks_container::iterator it = affinity_backup.begin(); + it != affinity_backup.end(); it++) { + *it = platform_topology::allocate_process_affinity_mask(); + } + } + + ~numa_affinity_handler() { + for (affinity_masks_container::iterator it = affinity_backup.begin(); + it != affinity_backup.end(); it++) { + platform_topology::free_affinity_mask(*it); + } + } + + void bind_thread_to_node( unsigned slot_num, unsigned numa_node_id ) { + __TBB_ASSERT(slot_num < affinity_backup.size(), + "The slot number is greater than the number of slots in the arena"); + __TBB_ASSERT(platform_topology::is_topology_parsed(), + "Trying to get access to uninitialized platform_topology"); + platform_topology::store_current_affinity_mask(affinity_backup[slot_num]); + + platform_topology::set_new_affinity_mask( + platform_topology::get_node_affinity_mask(numa_node_id)); + } + + void restore_previous_affinity_mask( unsigned slot_num ) { + __TBB_ASSERT(platform_topology::is_topology_parsed(), + "Trying to get access to uninitialized platform_topology"); + platform_topology::set_new_affinity_mask(affinity_backup[slot_num]); + }; + +}; + +class numa_binding_observer : public tbb::task_scheduler_observer { + int my_numa_node_id; + numa_affinity_handler numa_handler; +public: + numa_binding_observer( task_arena* ta, int numa_id, int num_slots ) + : task_scheduler_observer(*ta) + , my_numa_node_id(numa_id) + , numa_handler(num_slots) + {} + + void on_scheduler_entry( bool ) __TBB_override { + numa_handler.bind_thread_to_node(this_task_arena::current_thread_index(), my_numa_node_id); + } + + void on_scheduler_exit( bool ) __TBB_override { + numa_handler.restore_previous_affinity_mask(this_task_arena::current_thread_index()); + } +}; + +extern "C" { // exported to TBB interfaces + +void initialize_numa_topology( size_t groups_num, + int& nodes_count, int*& indexes_list, int*& concurrency_list ) { + platform_topology::initialize(groups_num); + platform_topology::fill(nodes_count, indexes_list, concurrency_list); +} + +task_scheduler_observer* subscribe_arena( task_arena* ta, int numa_id, int num_slots ) { + task_scheduler_observer* binding_observer = new numa_binding_observer(ta, numa_id, num_slots); + __TBB_ASSERT(binding_observer, "Failure during NUMA binding observer allocation and construction"); + binding_observer->observe(true); + return binding_observer; +} + +void unsubscribe_arena( task_scheduler_observer* binding_observer ) { + __TBB_ASSERT(binding_observer, "Trying to deallocate NULL pointer"); + binding_observer->observe(false); + delete binding_observer; +} + +} // extern "C" + +} // namespace internal +} // namespace tbb + +#undef assertion_hwloc_wrapper diff --git a/src/tbb/win32-tbb-export.lst b/src/tbb/win32-tbb-export.lst index 03bc710c54..42eb7602b4 100644 --- a/src/tbb/win32-tbb-export.lst +++ b/src/tbb/win32-tbb-export.lst @@ -77,6 +77,11 @@ __TBB_SYMBOL( ?internal_attach@task_arena_base@internal@interface7@tbb@@IAEXXZ ) __TBB_SYMBOL( ?internal_enqueue@task_arena_base@internal@interface7@tbb@@IBEXAAVtask@4@H@Z ) __TBB_SYMBOL( ?internal_execute@task_arena_base@internal@interface7@tbb@@IBEXAAVdelegate_base@234@@Z ) __TBB_SYMBOL( ?internal_wait@task_arena_base@internal@interface7@tbb@@IBEXXZ ) +#if __TBB_NUMA_SUPPORT +__TBB_SYMBOL( ?nodes_count@numa_topology@internal@tbb@@YAIXZ ) +__TBB_SYMBOL( ?fill@numa_topology@internal@tbb@@YAXPAH@Z ) +__TBB_SYMBOL( ?default_concurrency@numa_topology@internal@tbb@@YAHH@Z ) +#endif /*__TBB_NUMA_SUPPORT*/ #if __TBB_TASK_ISOLATION __TBB_SYMBOL( ?isolate_within_arena@internal@interface7@tbb@@YAXAAVdelegate_base@123@H@Z ) #endif /* __TBB_TASK_ISOLATION */ diff --git a/src/tbb/win32-tbbbind-export.def b/src/tbb/win32-tbbbind-export.def new file mode 100644 index 0000000000..0400f65199 --- /dev/null +++ b/src/tbb/win32-tbbbind-export.def @@ -0,0 +1,20 @@ +; Copyright (c) 2005-2019 Intel Corporation +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +EXPORTS + +global +initialize_numa_topology +subscribe_arena +unsubscribe_arena diff --git a/src/tbb/win64-gcc-tbb-export.lst b/src/tbb/win64-gcc-tbb-export.lst index b48ae31cbe..1a3d49bd91 100644 --- a/src/tbb/win64-gcc-tbb-export.lst +++ b/src/tbb/win64-gcc-tbb-export.lst @@ -68,6 +68,11 @@ __TBB_SYMBOL( _ZNK3tbb10interface78internal15task_arena_base16internal_executeER __TBB_SYMBOL( _ZNK3tbb10interface78internal15task_arena_base13internal_waitEv ) __TBB_SYMBOL( _ZN3tbb10interface78internal15task_arena_base21internal_current_slotEv ) __TBB_SYMBOL( _ZN3tbb10interface78internal15task_arena_base24internal_max_concurrencyEPKNS0_10task_arenaE ) +#if __TBB_NUMA_SUPPORT +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology11nodes_countEv ) +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology4fillEPi ) +__TBB_SYMBOL( _ZN3tbb8internal13numa_topology19default_concurrencyEi ) +#endif /*__TBB_NUMA_SUPPORT*/ #if __TBB_TASK_ISOLATION __TBB_SYMBOL( _ZN3tbb10interface78internal20isolate_within_arenaERNS1_13delegate_baseEx ) #endif /* __TBB_TASK_ISOLATION */ diff --git a/src/tbb/win64-tbb-export.lst b/src/tbb/win64-tbb-export.lst index d28ca54c3d..1ba741bc18 100644 --- a/src/tbb/win64-tbb-export.lst +++ b/src/tbb/win64-tbb-export.lst @@ -73,6 +73,11 @@ __TBB_SYMBOL( ?internal_attach@task_arena_base@internal@interface7@tbb@@IEAAXXZ __TBB_SYMBOL( ?internal_enqueue@task_arena_base@internal@interface7@tbb@@IEBAXAEAVtask@4@_J@Z ) __TBB_SYMBOL( ?internal_execute@task_arena_base@internal@interface7@tbb@@IEBAXAEAVdelegate_base@234@@Z ) __TBB_SYMBOL( ?internal_wait@task_arena_base@internal@interface7@tbb@@IEBAXXZ ) +#if __TBB_NUMA_SUPPORT +__TBB_SYMBOL( ?nodes_count@numa_topology@internal@tbb@@YAIXZ ) +__TBB_SYMBOL( ?fill@numa_topology@internal@tbb@@YAXPEAH@Z ) +__TBB_SYMBOL( ?default_concurrency@numa_topology@internal@tbb@@YAHH@Z ) +#endif /*__TBB_NUMA_SUPPORT*/ #if __TBB_TASK_ISOLATION __TBB_SYMBOL( ?isolate_within_arena@internal@interface7@tbb@@YAXAEAVdelegate_base@123@_J@Z ) #endif /* __TBB_TASK_ISOLATION */ diff --git a/src/tbb/win64-tbbbind-export.def b/src/tbb/win64-tbbbind-export.def new file mode 100644 index 0000000000..0400f65199 --- /dev/null +++ b/src/tbb/win64-tbbbind-export.def @@ -0,0 +1,20 @@ +; Copyright (c) 2005-2019 Intel Corporation +; +; Licensed under the Apache License, Version 2.0 (the "License"); +; you may not use this file except in compliance with the License. +; You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, software +; distributed under the License is distributed on an "AS IS" BASIS, +; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +; See the License for the specific language governing permissions and +; limitations under the License. + +EXPORTS + +global +initialize_numa_topology +subscribe_arena +unsubscribe_arena diff --git a/src/test/harness.h b/src/test/harness.h index 20a5d19075..5540af8332 100644 --- a/src/test/harness.h +++ b/src/test/harness.h @@ -58,6 +58,8 @@ int TestMain (); #include #include #endif /* !__SUNPRO_CC */ +#include +#include #include @@ -841,6 +843,32 @@ class NoAfterlife { #endif } + long GetIntEnv( const char * envname ) { + ASSERT(envname, "Harness::GetIntEnv() requires a valid C string"); +#if !__TBB_WIN8UI_SUPPORT + if( const char* s = std::getenv(envname) ){ + char* end = NULL; + errno = 0; + long value = std::strtol(s, &end, 10); + + // We have exceeded the range, value is negative or string is incovertable + if ( errno == ERANGE || value < 0 || end==s ) + { + return -1; + } + + for ( ; *end != '\0'; end++ ) + { + if ( !std::isspace(*end) ) + return -1; + } + + return value; + } +#endif + return -1; + } + class DummyBody { int m_numIters; public: diff --git a/src/test/test_aggregator.cpp b/src/test/test_aggregator.cpp index 601ab4ff38..dbcdd2cdb2 100644 --- a/src/test/test_aggregator.cpp +++ b/src/test/test_aggregator.cpp @@ -132,10 +132,9 @@ class my_handler { }; class ExpertBody : NoAssign { - pq_t& pq; tbb::aggregator_ext& agg; public: - ExpertBody(pq_t& pq_, tbb::aggregator_ext& agg_) : pq(pq_), agg(agg_) {} + ExpertBody(tbb::aggregator_ext& agg_) : agg(agg_) {} void operator()(const int threadID) const { for (int i=0; i agg((my_handler(&my_pq))); for (int i=0; i + +#if __TBB_CPP11_PRESENT +#include +#endif /*__TBB_CPP11_PRESENT*/ + +#if _MSC_VER +#pragma warning( push ) +#pragma warning( disable : 4100 ) +#endif +#include +#if _MSC_VER +#pragma warning( pop ) +#endif + +#include "tbb/concurrent_unordered_set.h" +#include "tbb/parallel_for.h" + +// Macro to check hwloc interfaces return codes +#define hwloc_assert_ex(command, ...) \ + ASSERT(command(__VA_ARGS__) >= 0, "Error occured inside hwloc call."); + +namespace numa_validation { + namespace { + class system_info_t { + hwloc_topology_t topology; + + hwloc_nodeset_t process_node_set; + hwloc_cpuset_t process_cpu_set; + + hwloc_cpuset_t buffer_cpu_set; + hwloc_cpuset_t buffer_node_set; + + // hwloc_cpuset_t, hwloc_nodeset_t (inherited from hwloc_bitmap_t ) is pointers, + // so we must manage memory allocation and deallocation + typedef tbb::concurrent_unordered_set memory_handler_t; + memory_handler_t memory_handler; + + bool is_initialized; + public: + system_info_t() : memory_handler() { + is_initialized = false; + } + + void initialize() { + if (is_initialized) return; + + hwloc_assert_ex(hwloc_topology_init, &topology); + hwloc_assert_ex(hwloc_topology_load, topology); + + if ( Harness::GetIntEnv("NUMBER_OF_PROCESSORS_GROUPS") > 1 ) { + process_cpu_set = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset (topology)); + process_node_set = hwloc_bitmap_dup(hwloc_topology_get_complete_nodeset(topology)); + } else { + process_cpu_set = hwloc_bitmap_alloc(); + process_node_set = hwloc_bitmap_alloc(); + + hwloc_assert_ex(hwloc_get_cpubind, topology, process_cpu_set, 0); + hwloc_cpuset_to_nodeset(topology, process_cpu_set, process_node_set); + } + + // If system contains no NUMA nodes, HWLOC 1.11 returns an infinitely filled bitmap. + // hwloc_bitmap_weight() returns negative value for such bitmaps, so we use this check + // to workaround this case. + if (hwloc_bitmap_weight(process_node_set) <= 0) { + hwloc_bitmap_only(process_node_set, 0); + } + +// Debug macros for test topology parser validation +#if NUMBER_OF_NUMA_NODES + ASSERT(hwloc_bitmap_weight(process_node_set) == NUMBER_OF_NUMA_NODES, + "Manual NUMA nodes count check."); +#endif /*NUMBER_OF_NUMA_NODES*/ + + buffer_cpu_set = hwloc_bitmap_alloc(); + buffer_node_set = hwloc_bitmap_alloc(); + + is_initialized = true; + } + + ~system_info_t() { + if (is_initialized) { + for (memory_handler_t::iterator it = memory_handler.begin(); + it != memory_handler.end(); it++) { + hwloc_bitmap_free(*it); + } + hwloc_bitmap_free(process_cpu_set); + hwloc_bitmap_free(process_node_set); + hwloc_bitmap_free(buffer_cpu_set); + hwloc_bitmap_free(buffer_node_set); + + hwloc_topology_destroy(topology); + } + } + + hwloc_bitmap_t allocate_empty_affinity_mask() { + __TBB_ASSERT(is_initialized, "Call of uninitialized system_info"); + hwloc_bitmap_t result = hwloc_bitmap_alloc(); + memory_handler.insert(result); + return result; + } + + hwloc_cpuset_t allocate_current_cpu_set() { + __TBB_ASSERT(is_initialized, "Call of uninitialized system_info"); + hwloc_cpuset_t current_affinity_mask = allocate_empty_affinity_mask(); + hwloc_assert_ex(hwloc_get_cpubind, topology, current_affinity_mask, HWLOC_CPUBIND_THREAD ); + ASSERT(!hwloc_bitmap_iszero(current_affinity_mask), "Empty current affinity mask."); + return current_affinity_mask; + } + + hwloc_const_cpuset_t get_process_cpu_set() { + __TBB_ASSERT(is_initialized, "Call of uninitialized system_info"); + return process_cpu_set; + } + + hwloc_const_nodeset_t get_process_node_set() { + __TBB_ASSERT(is_initialized, "Call of uninitialized system_info"); + return process_node_set; + } + + int numa_node_max_concurrency(int index) { + __TBB_ASSERT(is_initialized, "Call of uninitialized system_info"); + hwloc_bitmap_only(buffer_node_set, index); + hwloc_cpuset_from_nodeset(topology, buffer_cpu_set, buffer_node_set); + hwloc_bitmap_and(buffer_cpu_set, buffer_cpu_set, process_cpu_set); + ASSERT(hwloc_bitmap_weight(buffer_cpu_set) > 0, "Negative concurrency."); + return hwloc_bitmap_weight(buffer_cpu_set); + } + }; + + static system_info_t system_info; + } /*internal namespace*/ + +typedef hwloc_bitmap_t affinity_mask; +typedef hwloc_const_bitmap_t const_affinity_mask; + +void initialize_system_info() { system_info.initialize(); } + +affinity_mask allocate_current_cpu_set() { + return system_info.allocate_current_cpu_set(); +} + +bool affinity_masks_isequal(const_affinity_mask first, const_affinity_mask second) { + return hwloc_bitmap_isequal(first, second) ? true : false; +} + +bool affinity_masks_intersects(const_affinity_mask first, const_affinity_mask second) { + return hwloc_bitmap_intersects(first, second) ? true : false; +} + +void validate_topology_information(std::vector numa_indexes) { + // Generate available numa nodes bitmap + const_affinity_mask process_node_set = system_info.get_process_node_set(); + + // Parse input indexes list to numa nodes bitmap + affinity_mask merged_input_node_set = system_info.allocate_empty_affinity_mask(); + int whole_system_concurrency = 0; + for (unsigned i = 0; i < numa_indexes.size(); i++) { + ASSERT(!hwloc_bitmap_isset(merged_input_node_set, numa_indexes[i]), "Indices are repeated."); + hwloc_bitmap_set(merged_input_node_set, numa_indexes[i]); + + ASSERT(tbb::info::default_concurrency(numa_indexes[i]) == + system_info.numa_node_max_concurrency(numa_indexes[i]), + "Wrong default concurrency value."); + whole_system_concurrency += tbb::info::default_concurrency(numa_indexes[i]); + } + + ASSERT(whole_system_concurrency == tbb::task_scheduler_init::default_num_threads(), + "Wrong whole system default concurrency level."); + ASSERT(affinity_masks_isequal(process_node_set, merged_input_node_set), + "Input array of indices is not equal with proccess numa node set."); +} + +} /*namespace numa_validation*/ + +#if __TBB_CPP11_PRESENT +namespace numa_validation { + template + typename std::enable_if::value_type, affinity_mask>::value, void>:: + type affinity_set_verification(It begin, It end) { + affinity_mask buffer_mask = system_info.allocate_empty_affinity_mask(); + for (auto it = begin; it != end; it++) { + ASSERT(!hwloc_bitmap_intersects(buffer_mask, *it), + "Bitmaps that are binded to different nodes are intersects."); + // Add masks to buffer_mask to concatenate process affinity mask + hwloc_bitmap_or(buffer_mask, buffer_mask, *it); + } + + ASSERT(affinity_masks_isequal(system_info.get_process_cpu_set(), buffer_mask), + "Some cores was not included to bitmaps."); + } +} /*namespace numa_validation*/ + +struct execute_wrapper { + template + void emplace_function(tbb::task_arena& ta, Callable functor) { + ta.execute(functor); + } +}; + +struct enqueue_wrapper { + template + void emplace_function(tbb::task_arena& ta, Callable functor) { + ta.enqueue(functor); + } +}; + +template +typename std::enable_if::value_type, tbb::task_arena>::value, void>:: +type test_numa_binding_impl(It begin, It end, FuncWrapper wrapper) { + tbb::concurrent_unordered_set affinity_masks; + std::atomic counter(0), expected_count(0); + + auto affinity_mask_checker = [&counter, &affinity_masks]() { + affinity_masks.insert(numa_validation::allocate_current_cpu_set()); + counter++; + }; + + for (auto it = begin; it != end; it++) { + expected_count++; + wrapper.emplace_function(*it, affinity_mask_checker); + } + + // Wait for all spawned tasks + while (counter != expected_count) {} + numa_validation::affinity_set_verification(affinity_masks.begin(),affinity_masks.end()); +} + +void test_numa_binding(std::vector numa_indexes_vector) { + + std::vector arenas(numa_indexes_vector.size()); + + for(unsigned i = 0; i < numa_indexes_vector.size(); i++) { + // Bind arenas to numa nodes + arenas[i].initialize(tbb::task_arena::constraints(numa_indexes_vector[i])); + } + + test_numa_binding_impl(arenas.begin(), arenas.end(), execute_wrapper()); + test_numa_binding_impl(arenas.begin(), arenas.end(), enqueue_wrapper()); +} + +void recursive_arena_binding(int*, int, numa_validation::affinity_mask); + +void recursive_arena_binding(int* numa_indexes, size_t count, + std::vector& affinity_masks) { + if (count > 0) { + tbb::task_arena current_level_arena; + current_level_arena.initialize(tbb::task_arena::constraints(numa_indexes[count - 1])); + current_level_arena.execute( + [&numa_indexes, &count, &affinity_masks]() { + affinity_masks.push_back(numa_validation::allocate_current_cpu_set()); + recursive_arena_binding(numa_indexes, --count, affinity_masks); + } + ); + } else { + // Validation of assigned affinity masks at the deepest recursion step + numa_validation::affinity_set_verification(affinity_masks.begin(), affinity_masks.end()); + } + + if (!affinity_masks.empty()) { + ASSERT(numa_validation::affinity_masks_isequal(affinity_masks.back(), + numa_validation::allocate_current_cpu_set()), + "After binding to different NUMA node thread affinity was not returned to previous state."); + affinity_masks.pop_back(); + } +} + +void test_nested_numa_binding(std::vector numa_indexes_vector) { + std::vector affinity_masks; + recursive_arena_binding(numa_indexes_vector.data(), numa_indexes_vector.size(), affinity_masks); +} + +void test_memory_leak(std::vector numa_indexes_vector){ + size_t big_number = 1000; + size_t current_memory_usage = 0, previous_memory_usage = 0, stability_counter=0; + for (size_t i = 0; i < big_number; i++) { + { /* All DTORs must be called before GetMemoryUsage() call*/ + std::vector arenas(numa_indexes_vector.size()); + std::vector barriers(numa_indexes_vector.size()); + + for(unsigned j = 0; j < numa_indexes_vector.size(); j++) { + arenas[j].initialize(tbb::task_arena::constraints(numa_indexes_vector[j])); + barriers[j].initialize(arenas[j].max_concurrency()); + Harness::SpinBarrier& barrier_ref = barriers[j]; + arenas[j].enqueue([&barrier_ref](){ + tbb::parallel_for(tbb::blocked_range(0, tbb::this_task_arena::max_concurrency()), + [&barrier_ref](const tbb::blocked_range&){ + barrier_ref.wait(); + }); + }); + } + + for(unsigned j = 0; j < numa_indexes_vector.size(); j++) { + arenas[j].debug_wait_until_empty(); + } + } + + current_memory_usage = GetMemoryUsage(); + stability_counter = current_memory_usage==previous_memory_usage ? stability_counter + 1 : 0; + // If the amount of used memory has not changed during 10% of executions, + // then we can assume that the check was successful + if (stability_counter > big_number / 10) return; + previous_memory_usage = current_memory_usage; + } + ASSERT(false, "Seems like we get memory leak here."); +} +#endif /*__TBB_CPP11_PRESENT*/ + +int TestMain() { +#if _WIN32 && !_WIN64 + // HWLOC cannot proceed affinity masks on Windows in 32-bit mode if there are more than 32 logical CPU. + SYSTEM_INFO si; + GetNativeSystemInfo(&si); + if (si.dwNumberOfProcessors > 32) return Harness::Skipped; +#endif // _WIN32 && !_WIN64 + + numa_validation::initialize_system_info(); + + std::vector numa_indexes = tbb::info::numa_nodes(); + numa_validation::validate_topology_information(numa_indexes); + +#if __TBB_CPP11_PRESENT + test_numa_binding(numa_indexes); + test_nested_numa_binding(numa_indexes); + test_memory_leak(numa_indexes); +#endif /*__TBB_CPP11_PRESENT*/ + + return Harness::Done; +} diff --git a/src/test/test_arena_constraints_stubs.cpp b/src/test/test_arena_constraints_stubs.cpp new file mode 100644 index 0000000000..f7db60e2ff --- /dev/null +++ b/src/test/test_arena_constraints_stubs.cpp @@ -0,0 +1,41 @@ +/* + Copyright (c) 2019 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#define TBB_PREVIEW_NUMA_SUPPORT 1 +#include "tbb/tbb_config.h" + +#include "harness.h" + +#include "tbb/task_arena.h" +#include "tbb/task_scheduler_init.h" + +#include + +void test_stubs(std::vector numa_indexes) { + ASSERT(numa_indexes.size() == 1, "Number of NUMA nodes must be pinned to 1," + " if we have no HWLOC on the system."); + ASSERT(numa_indexes[0] == -1, "Index of NUMA node must be pinned to 0," + " if we have no HWLOC on the system."); + ASSERT(tbb::info::default_concurrency(numa_indexes[0]) == tbb::task_scheduler_init::default_num_threads(), + "Concurrency for NUMA node must be equal to default_num_threads()," + " if we have no HWLOC on the system."); +} + +int TestMain() { + std::vector numa_indexes = tbb::info::numa_nodes(); + test_stubs(numa_indexes); + + return Harness::Done; +} diff --git a/src/test/test_concurrent_associative_common.h b/src/test/test_concurrent_associative_common.h index 4438792c30..abc5490dab 100644 --- a/src/test/test_concurrent_associative_common.h +++ b/src/test/test_concurrent_associative_common.h @@ -245,10 +245,10 @@ void MultiMapEraseTests(){ typename MultiMap::iterator erased_it; for (int i = 0; i < 10; ++i) { if ( i != 1 ) { - cont1.emplace(1, i); - cont2.emplace(1, i); + cont1.insert(std::make_pair(1, i)); + cont2.insert(std::make_pair(1, i)); } else { - erased_it = cont1.emplace(1, i).first; + erased_it = cont1.insert(std::make_pair(1, i)).first; } } diff --git a/src/test/test_join_node.cpp b/src/test/test_join_node.cpp index be65ee51c0..2e21526ad1 100644 --- a/src/test/test_join_node.cpp +++ b/src/test/test_join_node.cpp @@ -187,5 +187,6 @@ int TestMain() { test_main(); test_main(); test_main(); + generate_recirc_test >::do_test(); return Harness::Done; } diff --git a/src/test/test_resumable_tasks.cpp b/src/test/test_resumable_tasks.cpp index d1840fa6e2..7812be1180 100644 --- a/src/test/test_resumable_tasks.cpp +++ b/src/test/test_resumable_tasks.cpp @@ -15,6 +15,7 @@ */ #include "tbb/tbb_config.h" +#include "tbb/task_scheduler_observer.h" #include "harness.h" @@ -363,6 +364,43 @@ void TestNativeThread() { }); ASSERT(ets.local() == true, NULL); } + +class ObserverTracker : public tbb::task_scheduler_observer { + tbb::enumerable_thread_specific is_in_arena; +public: + tbb::atomic counter; + + ObserverTracker(tbb::task_arena& a) : tbb::task_scheduler_observer(a) { + counter = 0; + observe(true); + } + void on_scheduler_entry(bool) __TBB_override { + bool& l = is_in_arena.local(); + ASSERT(l == false, "The thread must call on_scheduler_entry only one time."); + l = true; + ++counter; + } + void on_scheduler_exit(bool) __TBB_override { + bool& l = is_in_arena.local(); + ASSERT(l == true, "The thread must call on_scheduler_entry before calling on_scheduler_exit."); + l = false; + } +}; + +void TestObservers() { + tbb::task_arena arena; + ObserverTracker tracker(arena); + do { + arena.execute([] { + tbb::parallel_for(0, 10, [](int) { + tbb::task::suspend([](tbb::task::suspend_point tag) { + tbb::task::resume(tag); + }); + }, tbb::simple_partitioner()); + }); + } while (tracker.counter < 100); + tracker.observe(false); +} #endif int TestMain() { @@ -379,6 +417,7 @@ int TestMain() { TestCleanupMaster(); TestPriorities(); TestNativeThread(); + TestObservers(); #endif ASSERT(ets.local() == true, NULL); return Harness::Done; diff --git a/src/test/test_source_node.cpp b/src/test/test_source_node.cpp index 2f064f6bdc..4b043edb82 100644 --- a/src/test/test_source_node.cpp +++ b/src/test/test_source_node.cpp @@ -413,13 +413,13 @@ void test_deduction_guides() { auto non_const_lambda = [](int& i) mutable { return i > 5; }; // Tests for source_node(graph&, Body) - source_node s1(g, lambda); + source_node s1(g, lambda, /*is_active=*/false); static_assert(std::is_same_v>); - source_node s2(g, non_const_lambda); + source_node s2(g, non_const_lambda, /*is_active=*/false); static_assert(std::is_same_v>); - source_node s3(g, source_body_f); + source_node s3(g, source_body_f, /*is_active=*/false); static_assert(std::is_same_v>); source_node s4(s3); @@ -429,13 +429,13 @@ void test_deduction_guides() { broadcast_node bc(g); // Tests for source_node(const node_set&, Body) - source_node s5(precedes(bc), lambda); + source_node s5(precedes(bc), lambda, /*is_active=*/false); static_assert(std::is_same_v>); - source_node s6(precedes(bc), non_const_lambda); + source_node s6(precedes(bc), non_const_lambda, /*is_active=*/false); static_assert(std::is_same_v>); - source_node s7(precedes(bc), source_body_f); + source_node s7(precedes(bc), source_body_f, /*is_active=*/false); static_assert(std::is_same_v>); #endif g.wait_for_all(); diff --git a/src/test/test_tbb_version.cpp b/src/test/test_tbb_version.cpp index b98ce3d20f..2eebe8b808 100644 --- a/src/test/test_tbb_version.cpp +++ b/src/test/test_tbb_version.cpp @@ -224,8 +224,8 @@ int main(int argc, char *argv[] ) { // Fill dictionary with version strings for platforms void initialize_strings_vector(std::vector * vector) { - vector->push_back(string_pair("TBB: VERSION\t\t2019.9", required)); // check TBB_VERSION - vector->push_back(string_pair("TBB: INTERFACE VERSION\t11009", required)); // check TBB_INTERFACE_VERSION + vector->push_back(string_pair("TBB: VERSION\t\t2020.0", required)); // check TBB_VERSION + vector->push_back(string_pair("TBB: INTERFACE VERSION\t11100", required)); // check TBB_INTERFACE_VERSION vector->push_back(string_pair("TBB: BUILD_DATE", required)); vector->push_back(string_pair("TBB: BUILD_HOST", required)); vector->push_back(string_pair("TBB: BUILD_OS", required));