Skip to content

Commit

Permalink
highres timestamp
Browse files Browse the repository at this point in the history
  • Loading branch information
ameli committed Dec 20, 2023
1 parent 434bf2f commit 9f2753d
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 25 deletions.
20 changes: 14 additions & 6 deletions .github/workflows/deploy-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['cp39', 'cp310', 'cp311', 'cp312']
# include:
# - os: ubuntu-latest
# python-version: 'pp38'
# - os: ubuntu-latest
# python-version: 'pp39'
# - os: ubuntu-latest
# python-version: 'pp310'
steps:
- name: Checkout
uses: actions/checkout@v4
Expand Down Expand Up @@ -78,12 +86,12 @@ jobs:
# does not need manylinux docker, thuns, CUDA can be installed in
# the github action's virtual machine using the Jimver's action
# in the previous action (see above).
- name: Build wheels in windows with CUDA support (X86_64)
- name: Build wheels in windows with CUDA (X86_64) Python ${{ matrix.python-version }}
if: matrix.os == 'windows-latest'
uses: pypa/[email protected]
env:
CIBW_ARCHS_WINDOWS: "AMD64 x86"
CIBW_BUILD: "*-win_amd64"
CIBW_BUILD: ${{ matrix.python-version }}"-win_amd64"
CIBW_SKIP: "pp* cp36-* cp37-* cp38-*"
CIBW_BUILD_VERBOSITY: 3
CIBW_ENVIRONMENT: "USE_LONG_INT=0 USE_UNSIGNED_LONG_INT=0 USE_CBLAS=0 USE_CUDA=1 CUDA_DYNAMIC_LOADING=1"
Expand All @@ -95,12 +103,12 @@ jobs:
# Do not enable compiling with CUDA for macos since NIVDIA no longer supports CUDA in the macos.
# Build wheel for macos X86_64.
# Note that wheels for macos ARM64 will be built on cirrus ci (see /tools/ci)
- name: Build wheels in mac without CUDA support (X86_64)
- name: Build wheels in mac without CUDA (X86_64) Python ${{ matrix.python-version }}
if: matrix.os == 'macos-latest'
uses: pypa/[email protected]
env:
CIBW_ARCHS_MACOS: "x86_64"
CIBW_BUILD: "*-macosx_x86_64"
CIBW_BUILD: ${{ matrix.python-version }}"-macosx_x86_64"
CIBW_SKIP: "pp* cp36-* cp37-* cp38-*"
CIBW_BUILD_VERBOSITY: 3
CIBW_ENVIRONMENT: "USE_LONG_INT=0 USE_UNSIGNED_LONG_INT=0 USE_CBLAS=0 USE_CUDA=0"
Expand All @@ -121,15 +129,15 @@ jobs:
# uploaded to pypi (pypi has 100MB upload limit). On the down side,
# the user has to install cuda library themselves.
# Note that wheels for linux AARCH64 will be built on cirrus ci (see /tools/ci)
- name: Build wheels in linux with CUDA support (X86_64)
- name: Build wheels in linux with CUDA (X86_64) Pyhton ${{ matrix.python-version }}
if: matrix.os == 'ubuntu-latest'
uses: pypa/[email protected]
env:
CIBW_MANYLINUX_X86_64_IMAGE: sameli/manylinux2014_x86_64_cuda_12.2
# CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
# CIBW_BEFORE_ALL_LINUX: chmod +x .github/scripts/install_cuda.sh && .github/scripts/install_cuda.sh
CIBW_ARCHS_LINUX: "x86_64"
CIBW_BUILD: "*-manylinux_x86_64"
CIBW_BUILD: ${{ matrix.python-version }}"-manylinux_x86_64"
CIBW_SKIP: "pp* cp36-* cp37-* cp38-*"
CIBW_BUILD_VERBOSITY: 3
CIBW_ENVIRONMENT: "USE_LONG_INT=0 USE_UNSIGNED_LONG_INT=0 USE_CBLAS=0 USE_CUDA=1 CUDA_DYNAMIC_LOADING=1 CUDA_HOME=/usr/local/cuda"
Expand Down
141 changes: 141 additions & 0 deletions imate/_random_generator/highres_time_stamp.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
/*
* SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <[email protected]>
* SPDX-License-Identifier: BSD-3-Clause
* SPDX-FileType: SOURCE
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the license found in the LICENSE.txt file in the root
* directory of this source tree.
*/


// =======
// Headers
// =======

#include "./highres_time_stamp.h"
#include "unistd.h" // uint64_t

// The following macros define either USE_QUERY_PERFORMANCE_COUNTER (if
// windows), or USE_CLOCK_GETTIME (if Linux or MacOS>=10.12), or none.
#if defined(WIN32) || defined(_WIN32) || defined(__WIN32__)

// Use Windows API
#include <windows.h> // LARGE_INTEGER, QueryPerformanceCounter
#define USE_QUERY_PERFORMANCE_COUNTER

#elif defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0)

// POSIX time compilant (Linux)
#include <time.h> // timespec, clock_gettime
#define USE_CLOCK_GETTIME

// MacOS version 10.12 and above has clock_gettime, but _POSIX_TIMERS is
// not defined. So, we check this function's availability in a different way.
#elif defined(__APPLE__)
#include <TargetConditionals.h>
#if TARGET_OS_MAC
#include <Availability.h>
#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__)
#if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 101200

// macOS Sierra (10.12) or later. Use clock_gettime
#include <time.h> // timespec, clock_gettime
#define USE_CLOCK_GETTIME

// Define CLOCK_REALTIME in case it is not defined
#if !defined(CLOCK_REALTIME)
#define CLOCK_REALTIME 0
#endif

#endif
#endif
#endif

#else

// Use neither clock_gettime nor QueryPerformanceCounter. Rather, fall back
// to std functions. Note that std::clock has very low resolution in
// windows.
#include <ctime> // std::time, std::clock

#endif


// ======================
// Get HighRes Time Stamp
// ======================

/// This function returns a high-resolution clock counter to be used to seed a
/// random generating function (see split_mix_64.cpp). The purpose of
/// high-resolution counter is that if the random-generating function is called
/// subsequently in very short intervals (about nano-second), the seed value
/// should be distinct.
///
/// In UNIX, such distinct seed values can be generated by ``std::clock()``
/// function which gives the processor counter. This function has enough
/// resolution to generate distinct values if called subsequently. In MacOS,
/// this function is available only after version 10.12.
///
/// In Windows, however, ``std::clock()`` is not a process counter, rather, it
/// measures the wall clock with resoluton of one second! Thus, this function
/// is not suitable at all on Windows. Instead, the QueryPerformanceCounter is
/// used.
///
/// In Case if machine is neither POSIX compilant nor Windows, this function
/// falls back to std::time and std::clock.
///
/// Note that this function works without C++11.
///
/// An alternative approach is to use ``rdtsc()``, but its use is discouraged,
/// since it is far less portable and many processors (like ARM64 archetecture)
/// do not support it. Unlike rdtsc, this function works on both X86_64 and
/// ARM64.

uint64_t get_highres_time_stamp(void)
{
// Zero means no proper function found for this OS and processor to
// support high-precision time counting.
uint64_t time_stamp = 0;

#if defined(USE_CLOCK_GETTIME)

// Using POSIX clock_gettime
struct timespec clock;
if (clock_gettime(CLOCK_REALTIME, &clock) == -1)
{
// One means there is an error, but the program continues without
// raising an error.
time_stamp = 1;
}

time_stamp = clock.tv_sec * 1e9 + clock.tv_nsec;

#elif defined(USE_QUERY_PERFORMANCE_COUNTER)

// Using Windows API for query performance counter
LARGE_INTEGER ticks;
if (!QueryPerformanceCounter(&ticks))
{
time_stamp = 1;
}
else
{
time_stamp = static_cast<uint64_t>(ticks.QuadPart);
}

#else

// Use std::time and std::clock.
// Note that std::time has low resolution second accuracy. To improve
// it, we add std::clock, which has high-resolution (in UNIX only).
// In UNIX, std::clock is the process time with high resolution. In
// Windows, note well that std::clock is the wall time with "second"
// resolution, and should be avoided.
time_stamp = static_cast<uint64_t>(std::time(0)) +
static_cast<uint64_t>(std::clock());

#endif

return time_stamp;
}
31 changes: 31 additions & 0 deletions imate/_random_generator/highres_time_stamp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* SPDX-FileCopyrightText: Copyright 2021, Siavash Ameli <[email protected]>
* SPDX-License-Identifier: BSD-3-Clause
* SPDX-FileType: SOURCE
*
* This program is free software: you can redistribute it and/or modify it
* under the terms of the license found in the LICENSE.txt file in the root
* directory of this source tree.
*/


#ifndef _RANDOM_GENERATOR_HIGHRES_TIME_STAMP_H_
#define _RANDOM_GENERATOR_HIGHRES_TIME_STAMP_H_


// ======
// Header
// ======

#include <stdint.h> // uint64_t


// ============
// Declarations
// ============

// Get HighRes Time Stamp
uint64_t get_highres_time_stamp(void);


#endif // _RANDOM_GENERATOR_HIGHRES_TIME_STAMP_H_
22 changes: 3 additions & 19 deletions imate/_random_generator/split_mix_64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@
// =======

#include "./split_mix_64.h"
#include "./highres_time_stamp.h" // get_highres_time_stamp
#include <cassert> // assert
// #include <ctime> // std::time
#include <chrono> // high_resolution_clock, duration_cast


// ===========
Expand All @@ -28,23 +27,8 @@

SplitMix64::SplitMix64()
{
// std::time gives the second since epoch. This, if this function is called
// multiple times a second, the std::time() results the same number. To
// make it differ between each milliseconds, the std::clock is added, which
// is the cpu time (in POSIX) or wall time (in windows) and in the unit of
// system's clocks per second.
// uint64_t seed = static_cast<uint64_t>(std::time(0)) +
// static_cast<uint64_t>(std::clock());

// Using the highest resolution clock, since we want to have distinct seed
// values if this function is called subsequently.
std::chrono::high_resolution_clock::time_point current_time = \
std::chrono::high_resolution_clock::now();

// Cast time point to 64-bit integer
uint64_t seed = static_cast<uint64_t>(
std::chrono::duration_cast<std::chrono::nanoseconds>(
current_time.time_since_epoch()).count());
// Seed the random generating algorithm with a high resolution time counter
uint64_t seed = get_highres_time_stamp();

// Seeding as follow only fills the first 32 bits of the 64-bit integer.
// Repeat the first 32 bits on the second 32-bits to create a better 64-bit
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -1696,6 +1696,8 @@ def run(self):
'Programming Language :: Python :: Implementation :: PyPy',
'Environment :: GPU :: NVIDIA CUDA',
'License :: OSI Approved :: BSD License',
'Operating System :: Unix',
'Operating System :: POSIX',
'Operating System :: POSIX :: Linux',
'Operating System :: Microsoft :: Windows',
'Operating System :: MacOS',
Expand Down

0 comments on commit 9f2753d

Please sign in to comment.