diff --git a/doc/GSG/get_started.rst b/doc/GSG/get_started.rst
index ec41123e4c..ed69b022e7 100644
--- a/doc/GSG/get_started.rst
+++ b/doc/GSG/get_started.rst
@@ -9,6 +9,8 @@ Get Started with |short_name|
 
 .. include:: before_beginning_and_example.rst
 
+.. include:: hybrid_cpu_support.rst
+
 Find more
 *********
 
diff --git a/doc/GSG/hybrid_cpu_support.rst b/doc/GSG/hybrid_cpu_support.rst
new file mode 100644
index 0000000000..a2b32f12b4
--- /dev/null
+++ b/doc/GSG/hybrid_cpu_support.rst
@@ -0,0 +1,40 @@
+.. _hybrid_cpu_support:
+
+Hybrid CPU and NUMA Support
+***************************
+
+If you need NUMA/Hybrid CPU support in oneTBB, you need to make sure that HWLOC* is installed on your system.
+
+HWLOC* (Hardware Locality) is a library that provides a portable abstraction of the hierarchical topology of modern architectures (NUMA, hybrid CPU systems, etc). 
+oneTBB relies on HWLOC* to identify the underlying topology of the system to optimize thread scheduling and memory allocation.
+
+Without HWLOC*, oneTBB may not take advantage of NUMA/Hybrid CPU support. Therefore, it's important to make sure that HWLOC* is installed before using oneTBB on such systems.
+
+Check HWLOC* on the System 
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To check if HWLOC* is already installed on your system, run `hwloc-ls`:
+
+   * For Linux* OS, in the command line. 
+   * For Windows* OS,  in the command prompt. 
+
+If HWLOC* is installed, the command displays information about the hardware topology of your system. 
+If it is not installed, you receive an error message saying that the command ``hwloc-ls`` could not be found.
+
+.. note:: For Hybrid CPU support, make sure that HWLOC* is version 2.5 or higher.
+          For NUMA support, install HWLOC* version 1.11 or higher. 
+
+Install HWLOC*
+^^^^^^^^^^^^^^
+
+To install HWLOC*, visit the official Portable Hardware Locality website (https://www-lb.open-mpi.org/projects/hwloc/).
+
+* For Windows* OS, binaries are available for download. 
+* For Linux* OS, only the source code is provided and binaries should be built. 
+
+On Linux* OS, HWLOC* can be also installed with package managers, such as APT*, YUM*, etc. 
+To do so, run: ``sudo apt install hwloc``. 
+
+
+.. note:: For Hybrid CPU support, make sure that HWLOC* is version 2.5 or higher.
+          For NUMA support, install HWLOC* version 1.11 or higher.
diff --git a/doc/conf.py b/doc/conf.py
index 4c7e2b8a4b..39a5ca90c9 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -29,7 +29,7 @@
     project = u'Intel® oneAPI Threading Building Blocks (oneTBB)'
 else:
     project = u'oneTBB'
-copyright = u'2022, Intel Corporation'
+copyright = u'2023, Intel Corporation'
 author = u'Intel'
 
 # The short X.Y version
diff --git a/doc/main/reference/reference.rst b/doc/main/reference/reference.rst
index 9c8bca526a..87d05c3267 100644
--- a/doc/main/reference/reference.rst
+++ b/doc/main/reference/reference.rst
@@ -19,6 +19,7 @@ It also describes features that are not included in the oneTBB specification.
     info_namespace
     parallel_for_each_semantics
     parallel_sort_ranges_extension
+    scalable_memory_pools/malloc_replacement_log
 
 Preview features
 ****************
diff --git a/doc/main/reference/scalable_memory_pools.rst b/doc/main/reference/scalable_memory_pools.rst
index 6cf81139b7..d04cd54a90 100644
--- a/doc/main/reference/scalable_memory_pools.rst
+++ b/doc/main/reference/scalable_memory_pools.rst
@@ -41,3 +41,4 @@ Here, ``P`` represents an instance of the memory pool class.
     scalable_memory_pools/memory_pool_cls
     scalable_memory_pools/fixed_pool_cls
     scalable_memory_pools/memory_pool_allocator_cls
+    
diff --git a/doc/main/reference/scalable_memory_pools/malloc_replacement_log.rst b/doc/main/reference/scalable_memory_pools/malloc_replacement_log.rst
new file mode 100644
index 0000000000..8fea89f949
--- /dev/null
+++ b/doc/main/reference/scalable_memory_pools/malloc_replacement_log.rst
@@ -0,0 +1,84 @@
+.. _malloc_replacement_log:
+
+TBB_malloc_replacement_log Function
+===================================
+
+.. note:: This function is for Windows* OS only. 
+
+Summary
+*******
+
+Provides information about the status of dynamic memory allocation replacement.
+
+Syntax
+*******
+
+:: 
+
+   extern "C" int TBB_malloc_replacement_log(char *** log_ptr);
+
+
+Header
+******
+
+::
+
+   #include "oneapi/tbb/tbbmalloc_proxy.h"
+
+
+Description
+***********
+
+Dynamic replacement of memory allocation functions on Windows* OS uses in-memory binary instrumentation techniques. 
+To make sure that such instrumentation is safe, oneTBB first searches for a subset of replaced functions in the Visual C++* runtime DLLs
+and checks if each one has a known bytecode pattern. If any required function is not found or its bytecode pattern is unknown, the replacement is skipped, 
+and the program continues to use the standard memory allocation functions.
+
+The ``TBB_malloc_replacement_log`` function allows the program to check if the dynamic memory replacement happens and to get a log of the performed checks.
+
+**Returns:**
+
+* 0, if all necessary functions are successfully found and the replacement takes place.
+* 1, otherwise. 
+
+The ``log_ptr`` parameter must be an address of a char** variable or be ``NULL``. If it is not ``NULL``, the function writes there the address of an array of 
+NULL-terminated strings containing detailed information about the searched functions in the following format:
+
+::
+
+   search_status: function_name (dll_name), byte pattern: <bytecodes>
+
+ 
+For more information about the replacement of dynamic memory allocation functions, see :ref:`Windows_C_Dynamic_Memory_Interface_Replacement`. 
+
+
+Example 
+*******
+
+::
+
+   #include "oneapi/tbb/tbbmalloc_proxy.h"
+   #include <stdio.h>
+
+   int main(){
+       char **func_replacement_log;
+       int func_replacement_status = TBB_malloc_replacement_log(&func_replacement_log);
+
+       if (func_replacement_status != 0) {
+           printf("tbbmalloc_proxy cannot replace memory allocation routines\n");
+           for (char** log_string = func_replacement_log; *log_string != 0; log_string++) {
+               printf("%s\n",*log_string);
+            }
+       }
+
+       return 0;
+   }
+
+
+Example output:
+
+:: 
+
+   tbbmalloc_proxy cannot replace memory allocation routines
+   Success: free (ucrtbase.dll), byte pattern: <C7442410000000008B4424>
+   Fail: _msize (ucrtbase.dll), byte pattern: <E90B000000CCCCCCCCCCCC>
diff --git a/doc/main/tbb_userguide/Floating_Point_Settings.rst b/doc/main/tbb_userguide/Floating_Point_Settings.rst
new file mode 100644
index 0000000000..4618f56ae5
--- /dev/null
+++ b/doc/main/tbb_userguide/Floating_Point_Settings.rst
@@ -0,0 +1,60 @@
+.. _Floating_Point_Settings:
+
+Floating-point Settings
+=======================
+
+To propagate CPU-specific settings for floating-point computations to tasks executed by the task scheduler, you can use one of the following two methods:
+
+* When a ``task_arena`` or a task scheduler for a given application thread is initialized, they capture the current floating-point settings of the thread. 
+* The ``task_group_context`` class has a method to capture the current floating-point settings. 
+
+By default, worker threads use floating-point settings obtained during the initialization of a ``task_arena`` or the implicit arena of the application thread. The settings are applied to all computations within that ``task_arena`` or started by that application thread.
+
+
+For better control over floating point behavior, a thread may capture the current settings in a task group context. Do it at context creation with a special flag passed to the constructor:
+
+::
+    
+    task_group_context ctx( task_group_context::isolated,
+                        task_group_context::default_traits | task_group_context::fp_settings );
+
+
+Or call the ``capture_fp_settings`` method:
+
+::
+    
+     task_group_context ctx;
+    ctx.capture_fp_settings();
+
+
+You can then pass the task group context to most parallel algorithms, including ``flow::graph``, to ensure that all tasks related to this algorithm use the specified floating-point settings. 
+It is possible to execute the parallel algorithms with different floating-point settings captured to separate contexts, even at the same time.
+
+Floating-point settings captured to a task group context prevail over the settings captured during task scheduler initialization. It means, if a context is passed to a parallel algorithm, the floating-point settings captured to the context are used. 
+Otherwise, if floating-point settings are not captured to the context, or a context is not explicitly specified, the settings captured during the task arena initialization are used.
+
+In a nested call to a parallel algorithm that does not use the context of a task group with explicitly captured floating-point settings, the outer-level settings are used. 
+If none of the outer-level contexts capture floating-point settings, the settings captured during task arena initialization are used.
+
+It guarantees that: 
+
+* Floating-point settings are applied to all tasks executed within a task arena, if they are captured: 
+
+  * To a task group context. 
+  * During the arena initialization. 
+
+* A call to a oneTBB parallel algorithm does not change the floating-point settings of the calling thread, even if the algorithm uses different settings.
+
+.. note:: 
+    The guarantees above apply only to the following conditions:
+    
+    * A user code inside a task should: 
+      
+      * Not change the floating-point settings.
+      * Revert any modifications. 
+      * Restore previous settings before the end of the task.
+
+    * oneTBB task scheduler observers are not used to set or modify floating point settings.
+
+    Otherwise, the stated guarantees are not valid and the behavior related to floating-point settings is undefined.
+
diff --git a/doc/main/tbb_userguide/Working_on_the_Assembly_Line_pipeline.rst b/doc/main/tbb_userguide/Working_on_the_Assembly_Line_pipeline.rst
index 939f713cd3..05786fbd82 100644
--- a/doc/main/tbb_userguide/Working_on_the_Assembly_Line_pipeline.rst
+++ b/doc/main/tbb_userguide/Working_on_the_Assembly_Line_pipeline.rst
@@ -172,13 +172,13 @@ equivalent version of the previous example that does this follows:
 
 
    void RunPipeline( int ntoken, FILE* input_file, FILE* output_file ) {
-       oneapi::tbb::filter_mode<void,TextSlice*> f1( oneapi::tbb::filter_mode::serial_in_order, 
+       oneapi::tbb::filter<void,TextSlice*> f1( oneapi::tbb::filter_mode::serial_in_order, 
                                           MyInputFunc(input_file) );
-       oneapi::tbb::filter_mode<TextSlice*,TextSlice*> f2(oneapi::tbb::filter_mode::parallel, 
+       oneapi::tbb::filter<TextSlice*,TextSlice*> f2(oneapi::tbb::filter_mode::parallel, 
                                                MyTransformFunc() );
-       oneapi::tbb::filter_mode<TextSlice*,void> f3(oneapi::tbb::filter_mode::serial_in_order, 
+       oneapi::tbb::filter<TextSlice*,void> f3(oneapi::tbb::filter_mode::serial_in_order, 
                                          MyOutputFunc(output_file) );
-       oneapi::tbb::filter_mode<void,void> f = f1 & f2 & f3;
+       oneapi::tbb::filter<void,void> f = f1 & f2 & f3;
        oneapi::tbb::parallel_pipeline(ntoken,f);
    }
 
diff --git a/doc/main/tbb_userguide/title.rst b/doc/main/tbb_userguide/title.rst
index c57cf2f6c2..b51c3294b8 100644
--- a/doc/main/tbb_userguide/title.rst
+++ b/doc/main/tbb_userguide/title.rst
@@ -14,6 +14,7 @@
    ../tbb_userguide/Flow_Graph
    ../tbb_userguide/work_isolation
    ../tbb_userguide/Exceptions_and_Cancellation
+   ../tbb_userguide/Floating_Point_Settings
    ../tbb_userguide/Containers
    ../tbb_userguide/Mutual_Exclusion
    ../tbb_userguide/Timing