fix orthogonalize

ameli · Dec 17, 2023 · e0898a4 · e0898a4
1 parent ca3e464
commit e0898a4
Show file tree

Hide file tree

Showing 13 changed files with 60 additions and 148 deletions.
diff --git a/.github/workflows/build-linux.yml b/.github/workflows/build-linux.yml
@@ -1,9 +1,9 @@
 name: build-linux
 
 on:
-    # push:
-    #     branches:
-    #         - main
+    push:
+        branches:
+            - main
     release:
         types: 
             - published

diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml
@@ -1,9 +1,9 @@
 name: build-macos
 
 on:
-    # push:
-    #     branches:
-    #         - main
+    push:
+        branches:
+            - main
     release:
         types: 
             - published

diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml
@@ -1,9 +1,9 @@
 name: build-windows
 
 on:
-    # push:
-    #     branches:
-    #         - main
+    push:
+        branches:
+            - main
     release:
         types: 
             - published

diff --git a/.github/workflows/deploy-conda.yml b/.github/workflows/deploy-conda.yml
@@ -1,9 +1,9 @@
 name: deploy-conda
 
 on:
-    # push:
-    #     branches:
-    #         - main
+    push:
+        branches:
+            - main
     release:
         types:
             - published

diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml
@@ -1,9 +1,9 @@
 name: deploy-docs
 
 on:
-    # push:
-    #     branches:
-    #         - main
+    push:
+        branches:
+            - main
     pull_request:
         branches:
             - main

diff --git a/.github/workflows/deploy-pypi.yml b/.github/workflows/deploy-pypi.yml
@@ -15,8 +15,7 @@ jobs:
         strategy:
             fail-fast: false
             matrix:
-                # os: [ubuntu-latest, windows-latest, macos-latest] # TEST
-                os: [ubuntu-latest]  # TEST
+                os: [ubuntu-latest, windows-latest, macos-latest]
         steps:
             - name: Checkout
               uses: actions/checkout@v4
@@ -131,8 +130,7 @@ jobs:
                   # CIBW_BEFORE_ALL_LINUX: chmod +x .github/scripts/install_cuda.sh && .github/scripts/install_cuda.sh
                   CIBW_ARCHS_LINUX: "x86_64"
                   CIBW_BUILD: "*-manylinux_x86_64"
-                  # CIBW_SKIP: "pp37-* cp36-* cp37-* cp38-*"  # TEST
-                  CIBW_SKIP: "pp37-* cp*"  # TEST
+                  CIBW_SKIP: "pp* cp36-* cp37-* cp38-*"
                   CIBW_BUILD_VERBOSITY: 3
                   CIBW_ENVIRONMENT: "USE_LONG_INT=0 USE_UNSIGNED_LONG_INT=0 USE_CBLAS=0 USE_CUDA=1 CUDA_DYNAMIC_LOADING=1 CUDA_HOME=/usr/local/cuda"
                   CIBW_BEFORE_BUILD_LINUX: yum update; yum install gcc-gfortran openblas-devel.x86_64 lapack-devel.x86_64 -y;

diff --git a/imate/__version__.py b/imate/__version__.py
@@ -1 +1 @@
-__version__ = "0.21.5"
+__version__ = "0.22.0"
diff --git a/imate/_c_trace_estimator/c_orthogonalization.cpp b/imate/_c_trace_estimator/c_orthogonalization.cpp
@@ -249,7 +249,7 @@ void cOrthogonalization<DataType>::gram_schmidt_process(
 ///                difference is very small), they cannot be orthogonalized
 ///                against each other. In this case, one of the vectors is
 ///                re-generated by new random numbers.
-///                
+///
 /// \warning       if \c num_vectors is larger than \c vector_size, the
 ///                orthogonalization fails since not all vectors are
 ///                independent, and at least one vector becomes zero.
@@ -279,9 +279,9 @@ void cOrthogonalization<DataType>::orthogonalize_vectors(
 
     IndexType i = 0;
     IndexType j;
-    IndexType start = 0;
+    IndexType start_j;
     DataType inner_prod;
-    DataType norm;
+    DataType norm_j;
     DataType norm_i;
     DataType distance;
     DataType epsilon = std::numeric_limits<DataType>::epsilon();
@@ -308,18 +308,22 @@ void cOrthogonalization<DataType>::orthogonalize_vectors(
         if (static_cast<LongIndexType>(i) > vector_size)
         {
             // When vector_size is smaller than i, it is fine to cast to signed
-            start = i - static_cast<IndexType>(vector_size);
+            start_j = i - static_cast<IndexType>(vector_size);
+        }
+        else
+        {
+            start_j = 0;
         }
 
         // Reorthogonalize against previous vectors
-        for (j=start; j < i; ++j)
+        for (j=start_j; j < i; ++j)
         {
             // Norm of the j-th vector
-            norm = cVectorOperations<DataType>::euclidean_norm(
+            norm_j = cVectorOperations<DataType>::euclidean_norm(
                     &vectors[j*vector_size], vector_size);
 
             // Check norm
-            if (norm < epsilon * sqrt(vector_size))
+            if (norm_j < epsilon * sqrt(vector_size))
             {
                 std::cerr << "WARNING: norm of the given vector is too " \
                           << " small. Cannot reorthogonalize against zero" \
@@ -334,36 +338,7 @@ void cOrthogonalization<DataType>::orthogonalize_vectors(
                     vector_size);
 
             // Scale of subtraction
-            DataType scale = inner_prod / (norm * norm);
-
-            // If scale is is 1, it is possible that i-th and j-th vectors are
-            // identical (or close). So, instead of subtracting them,
-            // regenerate new i-th vector.
-            if (std::abs(scale - 1.0) <= 2.0 * epsilon)
-            {
-                // Norm of the i-th vector
-                norm_i = cVectorOperations<DataType>::euclidean_norm(
-                        &vectors[i*vector_size], vector_size);
-
-                // Compute distance between i-th and j-th vector
-                distance = sqrt(norm_i*norm_i - 2.0*inner_prod + norm*norm);
-
-                // If distance is zero, do not reorthogonalize i-th against
-                // vector j-th and the subsequent vectors after j-th.
-                if (distance < 2.0 * epsilon * sqrt(vector_size))
-                {
-                    // Regenerate new random vector for i-th vector
-                    RandomArrayGenerator<DataType>::generate_random_array(
-                            random_number_generator, &vectors[i*vector_size],
-                            vector_size, num_threads);
-
-                    // Repeat the reorthogonalization for i-th vector against
-                    // all previous vectors again.
-                    success = 0;
-                    ++num_trials;
-                    break;
-                }
-            }
+            DataType scale = inner_prod / (norm_j * norm_j);
 
             // Subtraction
             cVectorOperations<DataType>::subtract_scaled_vector(

diff --git a/imate/_cu_trace_estimator/cu_orthogonalization.cu b/imate/_cu_trace_estimator/cu_orthogonalization.cu
@@ -286,9 +286,9 @@ void cuOrthogonalization<DataType>::orthogonalize_vectors(
 
     IndexType i = 0;
     IndexType j;
-    IndexType start = 0;
+    IndexType start_j;
     DataType inner_prod;
-    DataType norm;
+    DataType norm_j;
     DataType norm_i;
     DataType distance;
     DataType epsilon = std::numeric_limits<DataType>::epsilon();
@@ -316,18 +316,22 @@ void cuOrthogonalization<DataType>::orthogonalize_vectors(
         if (static_cast<LongIndexType>(i) > vector_size)
         {
             // When vector_size is smaller than i, it is fine to cast to signed
-            start = i - static_cast<IndexType>(vector_size);
+            start_j = i - static_cast<IndexType>(vector_size);
+        }
+        else
+        {
+            start_j = 0;
         }
 
         // Reorthogonalize against previous vectors
-        for (j=start; j < i; ++j)
+        for (j=start_j; j < i; ++j)
         {
             // Norm of the j-th vector
-            norm = cuVectorOperations<DataType>::euclidean_norm(
+            norm_j = cuVectorOperations<DataType>::euclidean_norm(
                     cublas_handle, &vectors[j*vector_size], vector_size);
 
             // Check norm
-            if (norm < epsilon * sqrt(vector_size))
+            if (norm_j < epsilon * sqrt(vector_size))
             {
                 std::cerr << "WARNING: norm of the given vector is too " \
                           << " small. Cannot reorthogonalize against zero" \
@@ -342,46 +346,7 @@ void cuOrthogonalization<DataType>::orthogonalize_vectors(
                     &vectors[j*vector_size], vector_size);
 
             // Scale of subtraction
-            DataType scale = inner_prod / (norm * norm);
-
-            // If scale is is 1, it is possible that i-th and j-th vectors are
-            // identical (or close). So, instead of subtracting them,
-            // regenerate new i-th vector.
-            if (std::abs(scale - 1.0) <= 2.0 * epsilon)
-            {
-                // Norm of the i-th vector
-                norm_i = cuVectorOperations<DataType>::euclidean_norm(
-                        cublas_handle, &vectors[i*vector_size], vector_size);
-
-                // Compute distance between i-th and j-th vector
-                distance = sqrt(norm_i*norm_i - 2.0*inner_prod + norm*norm);
-
-                // If distance is zero, do not reorthogonalize i-th against
-                // vector j-th and the subsequent vectors after j-th.
-                if (distance < 2.0 * epsilon * sqrt(vector_size))
-                {
-                    // Allocate buffer
-                    if (buffer == NULL)
-                    {
-                        buffer = new DataType[vector_size];
-                    }
-
-                    // Regenerate new random vector on buffer
-                    RandomArrayGenerator<DataType>::generate_random_array(
-                            random_number_generator, buffer,
-                            vector_size, num_threads);
-
-                    // Copy buffer to the i-th vector on device
-                    CudaInterface<DataType>::copy_to_device(
-                            buffer, vector_size, &vectors[i*vector_size]);
-
-                    // Repeat the reorthogonalization for i-th vector against
-                    // all previous vectors again.
-                    success = 0;
-                    ++num_trials;
-                    break;
-                }
-            }
+            DataType scale = inner_prod / (norm_j * norm_j);
 
             // Subtraction
             cuVectorOperations<DataType>::subtract_scaled_vector(

diff --git a/imate/_linear_algebra/orthogonalization.pyx b/imate/_linear_algebra/orthogonalization.pyx
@@ -17,6 +17,7 @@ from .._definitions.types cimport DataType, IndexType, LongIndexType, FlagType
 from libc.math cimport fmax, fabs, sqrt
 from libc.stdio cimport printf
 from libc.stdlib cimport abort
+from libcpp.limits cimport numeric_limits
 
 
 # ====================
@@ -98,7 +99,7 @@ cdef void gram_schmidt_process(
     cdef DataType norm
     cdef DataType norm_v
     cdef DataType scale
-    cdef DataType epsilon = 1e-15
+    cdef DataType epsilon = numeric_limits[DataType].epsilon()
 
     # Determine how many previous vectors to orthogonalize against
     if ortho_depth == 0:
@@ -216,9 +217,8 @@ cdef void orthogonalize_vectors(
     cdef IndexType j
     cdef IndexType start_j
     cdef DataType inner_prod
-    cdef DataType norm
+    cdef DataType norm_j
     cdef DataType norm_i
-    cdef DataType distance
     cdef DataType scale
     cdef DataType epsilon = 1e-15
     cdef IndexType success = 1
@@ -246,11 +246,11 @@ cdef void orthogonalize_vectors(
         for j in range(start_j, i):
 
             # Norm of the j-th vector
-            norm = cVectorOperations[DataType].euclidean_norm(
+            norm_j = cVectorOperations[DataType].euclidean_norm(
                     &vectors[j*vector_size], vector_size)
 
             # Check norm
-            if norm < epsilon * sqrt(vector_size):
+            if norm_j < epsilon * sqrt(vector_size):
                 printf('WARNING: norm of the given vector is too small. ')
                 printf('Cannot reorthogonalize against zero vector. ')
                 printf('Skipping.\n')
@@ -263,34 +263,7 @@ cdef void orthogonalize_vectors(
                     vector_size)
 
             # Scale of subtraction
-            scale = inner_prod / (norm**2)
-
-            # If scale is is 1, it is possible that i-th and j-th vectors are
-            # identical (or close). So, instead of subtracting them, regenerate
-            # a new i-th vector.
-            if fabs(scale - 1.0) <= 2.0 * epsilon:
-
-                # Norm of the i-th vector
-                norm_i = cVectorOperations[DataType].euclidean_norm(
-                        &vectors[i*vector_size], vector_size)
-
-                # Compute distance between i-th and j-th vector
-                distance = sqrt(norm_i**2 - 2.0*inner_prod + norm**2)
-
-                # If distance is zero, do not reorthogonalize i-th against
-                # vector j-th and the subsequent vectors after j-th.
-                if distance < 2.0 * epsilon * sqrt(vector_size):
-
-                    # Regenerate new random vector for i-th vector
-                    with gil:
-                        py_generate_random_array(&vectors[i*vector_size],
-                                                 vector_size, num_threads)
-
-                    # Repeat the reorthogonalization for i-th vector against
-                    # all previous vectors again.
-                    success = 0
-                    num_trials += 1
-                    break
+            scale = inner_prod / (norm_j**2)
 
             # Subtraction
             cVectorOperations[DataType].subtract_scaled_vector(
@@ -305,9 +278,8 @@ cdef void orthogonalize_vectors(
             if norm_i < epsilon * sqrt(vector_size):
 
                 # Regenerate new random vector for i-th vector
-                with gil:
-                    py_generate_random_array(&vectors[i*vector_size],
-                                             vector_size, num_threads)
+                py_generate_random_array(&vectors[i*vector_size],
+                                         vector_size, num_threads)
 
                 # Repeat the reorthogonalization for i-th vector against
                 # all previous vectors again.

diff --git a/imate/_random_generator/py_random_array_generator.pxd b/imate/_random_generator/py_random_array_generator.pxd
@@ -21,4 +21,4 @@ from .._definitions.types cimport DataType, LongIndexType, IndexType
 cdef void py_generate_random_array(
         DataType* array,
         const LongIndexType array_size,
-        const IndexType num_threads) except *
+        const IndexType num_threads) nogil
diff --git a/imate/_random_generator/py_random_array_generator.pyx b/imate/_random_generator/py_random_array_generator.pyx
@@ -14,7 +14,7 @@
 from .._definitions.types cimport DataType, LongIndexType, IndexType
 from .random_array_generator cimport RandomArrayGenerator
 from .random_number_generator cimport RandomNumberGenerator
-from .py_random_number_generator cimport pyRandomNumberGenerator
+# from .py_random_number_generator cimport pyRandomNumberGenerator
 
 
 # =====================
@@ -24,7 +24,7 @@ from .py_random_number_generator cimport pyRandomNumberGenerator
 cdef void py_generate_random_array(
         DataType* array,
         const LongIndexType array_size,
-        const IndexType num_threads) except *:
+        const IndexType num_threads) nogil:
     """
     A python wrapper for ``RandomArrayGenerator.generate_random_array()``.
 
@@ -38,11 +38,13 @@ cdef void py_generate_random_array(
     :type num_threads: int
     """
 
-    # Get the C object that is wrapped by py_random_number_generator
-    cdef pyRandomNumberGenerator py_random_number_generator = \
-        pyRandomNumberGenerator(num_threads)
+    # Create a random number generator object
     cdef RandomNumberGenerator* random_number_generator = \
-        py_random_number_generator.get_random_number_generator()
+        new RandomNumberGenerator(num_threads)
 
+    # Pass the random number generator to an array generator
     RandomArrayGenerator[DataType].generate_random_array(
             random_number_generator[0], array, array_size, num_threads)
+
+    # Delete object
+    del random_number_generator