Skip to content

Commit

Permalink
fix orthogonalize
Browse files Browse the repository at this point in the history
  • Loading branch information
ameli committed Dec 17, 2023
1 parent ca3e464 commit e0898a4
Show file tree
Hide file tree
Showing 13 changed files with 60 additions and 148 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/build-linux.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: build-linux

on:
# push:
# branches:
# - main
push:
branches:
- main
release:
types:
- published
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/build-macos.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: build-macos

on:
# push:
# branches:
# - main
push:
branches:
- main
release:
types:
- published
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/build-windows.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: build-windows

on:
# push:
# branches:
# - main
push:
branches:
- main
release:
types:
- published
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/deploy-conda.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: deploy-conda

on:
# push:
# branches:
# - main
push:
branches:
- main
release:
types:
- published
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/deploy-docs.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: deploy-docs

on:
# push:
# branches:
# - main
push:
branches:
- main
pull_request:
branches:
- main
Expand Down
6 changes: 2 additions & 4 deletions .github/workflows/deploy-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ jobs:
strategy:
fail-fast: false
matrix:
# os: [ubuntu-latest, windows-latest, macos-latest] # TEST
os: [ubuntu-latest] # TEST
os: [ubuntu-latest, windows-latest, macos-latest]
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down Expand Up @@ -131,8 +130,7 @@ jobs:
# CIBW_BEFORE_ALL_LINUX: chmod +x .github/scripts/install_cuda.sh && .github/scripts/install_cuda.sh
CIBW_ARCHS_LINUX: "x86_64"
CIBW_BUILD: "*-manylinux_x86_64"
# CIBW_SKIP: "pp37-* cp36-* cp37-* cp38-*" # TEST
CIBW_SKIP: "pp37-* cp*" # TEST
CIBW_SKIP: "pp* cp36-* cp37-* cp38-*"
CIBW_BUILD_VERBOSITY: 3
CIBW_ENVIRONMENT: "USE_LONG_INT=0 USE_UNSIGNED_LONG_INT=0 USE_CBLAS=0 USE_CUDA=1 CUDA_DYNAMIC_LOADING=1 CUDA_HOME=/usr/local/cuda"
CIBW_BEFORE_BUILD_LINUX: yum update; yum install gcc-gfortran openblas-devel.x86_64 lapack-devel.x86_64 -y;
Expand Down
2 changes: 1 addition & 1 deletion imate/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.21.5"
__version__ = "0.22.0"
49 changes: 12 additions & 37 deletions imate/_c_trace_estimator/c_orthogonalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ void cOrthogonalization<DataType>::gram_schmidt_process(
/// difference is very small), they cannot be orthogonalized
/// against each other. In this case, one of the vectors is
/// re-generated by new random numbers.
///
///
/// \warning if \c num_vectors is larger than \c vector_size, the
/// orthogonalization fails since not all vectors are
/// independent, and at least one vector becomes zero.
Expand Down Expand Up @@ -279,9 +279,9 @@ void cOrthogonalization<DataType>::orthogonalize_vectors(

IndexType i = 0;
IndexType j;
IndexType start = 0;
IndexType start_j;
DataType inner_prod;
DataType norm;
DataType norm_j;
DataType norm_i;
DataType distance;
DataType epsilon = std::numeric_limits<DataType>::epsilon();
Expand All @@ -308,18 +308,22 @@ void cOrthogonalization<DataType>::orthogonalize_vectors(
if (static_cast<LongIndexType>(i) > vector_size)
{
// When vector_size is smaller than i, it is fine to cast to signed
start = i - static_cast<IndexType>(vector_size);
start_j = i - static_cast<IndexType>(vector_size);
}
else
{
start_j = 0;
}

// Reorthogonalize against previous vectors
for (j=start; j < i; ++j)
for (j=start_j; j < i; ++j)
{
// Norm of the j-th vector
norm = cVectorOperations<DataType>::euclidean_norm(
norm_j = cVectorOperations<DataType>::euclidean_norm(
&vectors[j*vector_size], vector_size);

// Check norm
if (norm < epsilon * sqrt(vector_size))
if (norm_j < epsilon * sqrt(vector_size))
{
std::cerr << "WARNING: norm of the given vector is too " \
<< " small. Cannot reorthogonalize against zero" \
Expand All @@ -334,36 +338,7 @@ void cOrthogonalization<DataType>::orthogonalize_vectors(
vector_size);

// Scale of subtraction
DataType scale = inner_prod / (norm * norm);

// If scale is is 1, it is possible that i-th and j-th vectors are
// identical (or close). So, instead of subtracting them,
// regenerate new i-th vector.
if (std::abs(scale - 1.0) <= 2.0 * epsilon)
{
// Norm of the i-th vector
norm_i = cVectorOperations<DataType>::euclidean_norm(
&vectors[i*vector_size], vector_size);

// Compute distance between i-th and j-th vector
distance = sqrt(norm_i*norm_i - 2.0*inner_prod + norm*norm);

// If distance is zero, do not reorthogonalize i-th against
// vector j-th and the subsequent vectors after j-th.
if (distance < 2.0 * epsilon * sqrt(vector_size))
{
// Regenerate new random vector for i-th vector
RandomArrayGenerator<DataType>::generate_random_array(
random_number_generator, &vectors[i*vector_size],
vector_size, num_threads);

// Repeat the reorthogonalization for i-th vector against
// all previous vectors again.
success = 0;
++num_trials;
break;
}
}
DataType scale = inner_prod / (norm_j * norm_j);

// Subtraction
cVectorOperations<DataType>::subtract_scaled_vector(
Expand Down
57 changes: 11 additions & 46 deletions imate/_cu_trace_estimator/cu_orthogonalization.cu
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,9 @@ void cuOrthogonalization<DataType>::orthogonalize_vectors(

IndexType i = 0;
IndexType j;
IndexType start = 0;
IndexType start_j;
DataType inner_prod;
DataType norm;
DataType norm_j;
DataType norm_i;
DataType distance;
DataType epsilon = std::numeric_limits<DataType>::epsilon();
Expand Down Expand Up @@ -316,18 +316,22 @@ void cuOrthogonalization<DataType>::orthogonalize_vectors(
if (static_cast<LongIndexType>(i) > vector_size)
{
// When vector_size is smaller than i, it is fine to cast to signed
start = i - static_cast<IndexType>(vector_size);
start_j = i - static_cast<IndexType>(vector_size);
}
else
{
start_j = 0;
}

// Reorthogonalize against previous vectors
for (j=start; j < i; ++j)
for (j=start_j; j < i; ++j)
{
// Norm of the j-th vector
norm = cuVectorOperations<DataType>::euclidean_norm(
norm_j = cuVectorOperations<DataType>::euclidean_norm(
cublas_handle, &vectors[j*vector_size], vector_size);

// Check norm
if (norm < epsilon * sqrt(vector_size))
if (norm_j < epsilon * sqrt(vector_size))
{
std::cerr << "WARNING: norm of the given vector is too " \
<< " small. Cannot reorthogonalize against zero" \
Expand All @@ -342,46 +346,7 @@ void cuOrthogonalization<DataType>::orthogonalize_vectors(
&vectors[j*vector_size], vector_size);

// Scale of subtraction
DataType scale = inner_prod / (norm * norm);

// If scale is is 1, it is possible that i-th and j-th vectors are
// identical (or close). So, instead of subtracting them,
// regenerate new i-th vector.
if (std::abs(scale - 1.0) <= 2.0 * epsilon)
{
// Norm of the i-th vector
norm_i = cuVectorOperations<DataType>::euclidean_norm(
cublas_handle, &vectors[i*vector_size], vector_size);

// Compute distance between i-th and j-th vector
distance = sqrt(norm_i*norm_i - 2.0*inner_prod + norm*norm);

// If distance is zero, do not reorthogonalize i-th against
// vector j-th and the subsequent vectors after j-th.
if (distance < 2.0 * epsilon * sqrt(vector_size))
{
// Allocate buffer
if (buffer == NULL)
{
buffer = new DataType[vector_size];
}

// Regenerate new random vector on buffer
RandomArrayGenerator<DataType>::generate_random_array(
random_number_generator, buffer,
vector_size, num_threads);

// Copy buffer to the i-th vector on device
CudaInterface<DataType>::copy_to_device(
buffer, vector_size, &vectors[i*vector_size]);

// Repeat the reorthogonalization for i-th vector against
// all previous vectors again.
success = 0;
++num_trials;
break;
}
}
DataType scale = inner_prod / (norm_j * norm_j);

// Subtraction
cuVectorOperations<DataType>::subtract_scaled_vector(
Expand Down
44 changes: 8 additions & 36 deletions imate/_linear_algebra/orthogonalization.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ from .._definitions.types cimport DataType, IndexType, LongIndexType, FlagType
from libc.math cimport fmax, fabs, sqrt
from libc.stdio cimport printf
from libc.stdlib cimport abort
from libcpp.limits cimport numeric_limits


# ====================
Expand Down Expand Up @@ -98,7 +99,7 @@ cdef void gram_schmidt_process(
cdef DataType norm
cdef DataType norm_v
cdef DataType scale
cdef DataType epsilon = 1e-15
cdef DataType epsilon = numeric_limits[DataType].epsilon()

# Determine how many previous vectors to orthogonalize against
if ortho_depth == 0:
Expand Down Expand Up @@ -216,9 +217,8 @@ cdef void orthogonalize_vectors(
cdef IndexType j
cdef IndexType start_j
cdef DataType inner_prod
cdef DataType norm
cdef DataType norm_j
cdef DataType norm_i
cdef DataType distance
cdef DataType scale
cdef DataType epsilon = 1e-15
cdef IndexType success = 1
Expand Down Expand Up @@ -246,11 +246,11 @@ cdef void orthogonalize_vectors(
for j in range(start_j, i):

# Norm of the j-th vector
norm = cVectorOperations[DataType].euclidean_norm(
norm_j = cVectorOperations[DataType].euclidean_norm(
&vectors[j*vector_size], vector_size)

# Check norm
if norm < epsilon * sqrt(vector_size):
if norm_j < epsilon * sqrt(vector_size):
printf('WARNING: norm of the given vector is too small. ')
printf('Cannot reorthogonalize against zero vector. ')
printf('Skipping.\n')
Expand All @@ -263,34 +263,7 @@ cdef void orthogonalize_vectors(
vector_size)

# Scale of subtraction
scale = inner_prod / (norm**2)

# If scale is is 1, it is possible that i-th and j-th vectors are
# identical (or close). So, instead of subtracting them, regenerate
# a new i-th vector.
if fabs(scale - 1.0) <= 2.0 * epsilon:

# Norm of the i-th vector
norm_i = cVectorOperations[DataType].euclidean_norm(
&vectors[i*vector_size], vector_size)

# Compute distance between i-th and j-th vector
distance = sqrt(norm_i**2 - 2.0*inner_prod + norm**2)

# If distance is zero, do not reorthogonalize i-th against
# vector j-th and the subsequent vectors after j-th.
if distance < 2.0 * epsilon * sqrt(vector_size):

# Regenerate new random vector for i-th vector
with gil:
py_generate_random_array(&vectors[i*vector_size],
vector_size, num_threads)

# Repeat the reorthogonalization for i-th vector against
# all previous vectors again.
success = 0
num_trials += 1
break
scale = inner_prod / (norm_j**2)

# Subtraction
cVectorOperations[DataType].subtract_scaled_vector(
Expand All @@ -305,9 +278,8 @@ cdef void orthogonalize_vectors(
if norm_i < epsilon * sqrt(vector_size):

# Regenerate new random vector for i-th vector
with gil:
py_generate_random_array(&vectors[i*vector_size],
vector_size, num_threads)
py_generate_random_array(&vectors[i*vector_size],
vector_size, num_threads)

# Repeat the reorthogonalization for i-th vector against
# all previous vectors again.
Expand Down
2 changes: 1 addition & 1 deletion imate/_random_generator/py_random_array_generator.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@ from .._definitions.types cimport DataType, LongIndexType, IndexType
cdef void py_generate_random_array(
DataType* array,
const LongIndexType array_size,
const IndexType num_threads) except *
const IndexType num_threads) nogil
14 changes: 8 additions & 6 deletions imate/_random_generator/py_random_array_generator.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .._definitions.types cimport DataType, LongIndexType, IndexType
from .random_array_generator cimport RandomArrayGenerator
from .random_number_generator cimport RandomNumberGenerator
from .py_random_number_generator cimport pyRandomNumberGenerator
# from .py_random_number_generator cimport pyRandomNumberGenerator


# =====================
Expand All @@ -24,7 +24,7 @@ from .py_random_number_generator cimport pyRandomNumberGenerator
cdef void py_generate_random_array(
DataType* array,
const LongIndexType array_size,
const IndexType num_threads) except *:
const IndexType num_threads) nogil:
"""
A python wrapper for ``RandomArrayGenerator.generate_random_array()``.
Expand All @@ -38,11 +38,13 @@ cdef void py_generate_random_array(
:type num_threads: int
"""

# Get the C object that is wrapped by py_random_number_generator
cdef pyRandomNumberGenerator py_random_number_generator = \
pyRandomNumberGenerator(num_threads)
# Create a random number generator object
cdef RandomNumberGenerator* random_number_generator = \
py_random_number_generator.get_random_number_generator()
new RandomNumberGenerator(num_threads)

# Pass the random number generator to an array generator
RandomArrayGenerator[DataType].generate_random_array(
random_number_generator[0], array, array_size, num_threads)

# Delete object
del random_number_generator
Loading

0 comments on commit e0898a4

Please sign in to comment.