-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
191 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,14 @@ jobs: | |
fail-fast: false | ||
matrix: | ||
os: [ubuntu-latest, windows-latest, macos-latest] | ||
python-version: ['cp39', 'cp310', 'cp311', 'cp312'] | ||
# include: | ||
# - os: ubuntu-latest | ||
# python-version: 'pp38' | ||
# - os: ubuntu-latest | ||
# python-version: 'pp39' | ||
# - os: ubuntu-latest | ||
# python-version: 'pp310' | ||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v4 | ||
|
@@ -78,12 +86,12 @@ jobs: | |
# does not need manylinux docker, thuns, CUDA can be installed in | ||
# the github action's virtual machine using the Jimver's action | ||
# in the previous action (see above). | ||
- name: Build wheels in windows with CUDA support (X86_64) | ||
- name: Build wheels in windows with CUDA (X86_64) Python ${{ matrix.python-version }} | ||
if: matrix.os == 'windows-latest' | ||
uses: pypa/[email protected] | ||
env: | ||
CIBW_ARCHS_WINDOWS: "AMD64 x86" | ||
CIBW_BUILD: "*-win_amd64" | ||
CIBW_BUILD: ${{ matrix.python-version }}"-win_amd64" | ||
CIBW_SKIP: "pp* cp36-* cp37-* cp38-*" | ||
CIBW_BUILD_VERBOSITY: 3 | ||
CIBW_ENVIRONMENT: "USE_LONG_INT=0 USE_UNSIGNED_LONG_INT=0 USE_CBLAS=0 USE_CUDA=1 CUDA_DYNAMIC_LOADING=1" | ||
|
@@ -95,12 +103,12 @@ jobs: | |
# Do not enable compiling with CUDA for macos since NIVDIA no longer supports CUDA in the macos. | ||
# Build wheel for macos X86_64. | ||
# Note that wheels for macos ARM64 will be built on cirrus ci (see /tools/ci) | ||
- name: Build wheels in mac without CUDA support (X86_64) | ||
- name: Build wheels in mac without CUDA (X86_64) Python ${{ matrix.python-version }} | ||
if: matrix.os == 'macos-latest' | ||
uses: pypa/[email protected] | ||
env: | ||
CIBW_ARCHS_MACOS: "x86_64" | ||
CIBW_BUILD: "*-macosx_x86_64" | ||
CIBW_BUILD: ${{ matrix.python-version }}"-macosx_x86_64" | ||
CIBW_SKIP: "pp* cp36-* cp37-* cp38-*" | ||
CIBW_BUILD_VERBOSITY: 3 | ||
CIBW_ENVIRONMENT: "USE_LONG_INT=0 USE_UNSIGNED_LONG_INT=0 USE_CBLAS=0 USE_CUDA=0" | ||
|
@@ -121,15 +129,15 @@ jobs: | |
# uploaded to pypi (pypi has 100MB upload limit). On the down side, | ||
# the user has to install cuda library themselves. | ||
# Note that wheels for linux AARCH64 will be built on cirrus ci (see /tools/ci) | ||
- name: Build wheels in linux with CUDA support (X86_64) | ||
- name: Build wheels in linux with CUDA (X86_64) Pyhton ${{ matrix.python-version }} | ||
if: matrix.os == 'ubuntu-latest' | ||
uses: pypa/[email protected] | ||
env: | ||
CIBW_MANYLINUX_X86_64_IMAGE: sameli/manylinux2014_x86_64_cuda_12.2 | ||
# CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014 | ||
# CIBW_BEFORE_ALL_LINUX: chmod +x .github/scripts/install_cuda.sh && .github/scripts/install_cuda.sh | ||
CIBW_ARCHS_LINUX: "x86_64" | ||
CIBW_BUILD: "*-manylinux_x86_64" | ||
CIBW_BUILD: ${{ matrix.python-version }}"-manylinux_x86_64" | ||
CIBW_SKIP: "pp* cp36-* cp37-* cp38-*" | ||
CIBW_BUILD_VERBOSITY: 3 | ||
CIBW_ENVIRONMENT: "USE_LONG_INT=0 USE_UNSIGNED_LONG_INT=0 USE_CBLAS=0 USE_CUDA=1 CUDA_DYNAMIC_LOADING=1 CUDA_HOME=/usr/local/cuda" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
/* | ||
* SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <[email protected]> | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
* SPDX-FileType: SOURCE | ||
* | ||
* This program is free software: you can redistribute it and/or modify it | ||
* under the terms of the license found in the LICENSE.txt file in the root | ||
* directory of this source tree. | ||
*/ | ||
|
||
|
||
// ======= | ||
// Headers | ||
// ======= | ||
|
||
#include "./highres_time_stamp.h" | ||
#include "unistd.h" // uint64_t | ||
|
||
// The following macros define either USE_QUERY_PERFORMANCE_COUNTER (if | ||
// windows), or USE_CLOCK_GETTIME (if Linux or MacOS>=10.12), or none. | ||
#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) | ||
|
||
// Use Windows API | ||
#include <windows.h> // LARGE_INTEGER, QueryPerformanceCounter | ||
#define USE_QUERY_PERFORMANCE_COUNTER | ||
|
||
#elif defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) | ||
|
||
// POSIX time compilant (Linux) | ||
#include <time.h> // timespec, clock_gettime | ||
#define USE_CLOCK_GETTIME | ||
|
||
// MacOS version 10.12 and above has clock_gettime, but _POSIX_TIMERS is | ||
// not defined. So, we check this function's availability in a different way. | ||
#elif defined(__APPLE__) | ||
#include <TargetConditionals.h> | ||
#if TARGET_OS_MAC | ||
#include <Availability.h> | ||
#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) | ||
#if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 101200 | ||
|
||
// macOS Sierra (10.12) or later. Use clock_gettime | ||
#include <time.h> // timespec, clock_gettime | ||
#define USE_CLOCK_GETTIME | ||
|
||
// Define CLOCK_REALTIME in case it is not defined | ||
#if !defined(CLOCK_REALTIME) | ||
#define CLOCK_REALTIME 0 | ||
#endif | ||
|
||
#endif | ||
#endif | ||
#endif | ||
|
||
#else | ||
|
||
// Use neither clock_gettime nor QueryPerformanceCounter. Rather, fall back | ||
// to std functions. Note that std::clock has very low resolution in | ||
// windows. | ||
#include <ctime> // std::time, std::clock | ||
|
||
#endif | ||
|
||
|
||
// ====================== | ||
// Get HighRes Time Stamp | ||
// ====================== | ||
|
||
/// This function returns a high-resolution clock counter to be used to seed a | ||
/// random generating function (see split_mix_64.cpp). The purpose of | ||
/// high-resolution counter is that if the random-generating function is called | ||
/// subsequently in very short intervals (about nano-second), the seed value | ||
/// should be distinct. | ||
/// | ||
/// In UNIX, such distinct seed values can be generated by ``std::clock()`` | ||
/// function which gives the processor counter. This function has enough | ||
/// resolution to generate distinct values if called subsequently. In MacOS, | ||
/// this function is available only after version 10.12. | ||
/// | ||
/// In Windows, however, ``std::clock()`` is not a process counter, rather, it | ||
/// measures the wall clock with resoluton of one second! Thus, this function | ||
/// is not suitable at all on Windows. Instead, the QueryPerformanceCounter is | ||
/// used. | ||
/// | ||
/// In Case if machine is neither POSIX compilant nor Windows, this function | ||
/// falls back to std::time and std::clock. | ||
/// | ||
/// Note that this function works without C++11. | ||
/// | ||
/// An alternative approach is to use ``rdtsc()``, but its use is discouraged, | ||
/// since it is far less portable and many processors (like ARM64 archetecture) | ||
/// do not support it. Unlike rdtsc, this function works on both X86_64 and | ||
/// ARM64. | ||
|
||
uint64_t get_highres_time_stamp(void) | ||
{ | ||
// Zero means no proper function found for this OS and processor to | ||
// support high-precision time counting. | ||
uint64_t time_stamp = 0; | ||
|
||
#if defined(USE_CLOCK_GETTIME) | ||
|
||
// Using POSIX clock_gettime | ||
struct timespec clock; | ||
if (clock_gettime(CLOCK_REALTIME, &clock) == -1) | ||
{ | ||
// One means there is an error, but the program continues without | ||
// raising an error. | ||
time_stamp = 1; | ||
} | ||
|
||
time_stamp = clock.tv_sec * 1e9 + clock.tv_nsec; | ||
|
||
#elif defined(USE_QUERY_PERFORMANCE_COUNTER) | ||
|
||
// Using Windows API for query performance counter | ||
LARGE_INTEGER ticks; | ||
if (!QueryPerformanceCounter(&ticks)) | ||
{ | ||
time_stamp = 1; | ||
} | ||
else | ||
{ | ||
time_stamp = static_cast<uint64_t>(ticks.QuadPart); | ||
} | ||
|
||
#else | ||
|
||
// Use std::time and std::clock. | ||
// Note that std::time has low resolution second accuracy. To improve | ||
// it, we add std::clock, which has high-resolution (in UNIX only). | ||
// In UNIX, std::clock is the process time with high resolution. In | ||
// Windows, note well that std::clock is the wall time with "second" | ||
// resolution, and should be avoided. | ||
time_stamp = static_cast<uint64_t>(std::time(0)) + | ||
static_cast<uint64_t>(std::clock()); | ||
|
||
#endif | ||
|
||
return time_stamp; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/* | ||
* SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <[email protected]> | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
* SPDX-FileType: SOURCE | ||
* | ||
* This program is free software: you can redistribute it and/or modify it | ||
* under the terms of the license found in the LICENSE.txt file in the root | ||
* directory of this source tree. | ||
*/ | ||
|
||
|
||
#ifndef _RANDOM_GENERATOR_HIGHRES_TIME_STAMP_H_ | ||
#define _RANDOM_GENERATOR_HIGHRES_TIME_STAMP_H_ | ||
|
||
|
||
// ====== | ||
// Header | ||
// ====== | ||
|
||
#include <stdint.h> // uint64_t | ||
|
||
|
||
// ============ | ||
// Declarations | ||
// ============ | ||
|
||
// Get HighRes Time Stamp | ||
uint64_t get_highres_time_stamp(void); | ||
|
||
|
||
#endif // _RANDOM_GENERATOR_HIGHRES_TIME_STAMP_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters