BLAS example#
This example demonstrates SCALE's compatibility with cuBLAS APIs by using cuBLAS to perform a double-precision dot-product on an AMD GPU.
cuBLAS APIs are forwarded to use the relevant ROCm APIs.
Note that the example links to cublas
in its CMakeLists.txt
.
Example source code#
#include <vector>
#include <iostream>
#include <cublas_v2.h>
void check(cudaError_t error, const char * file, size_t line) {
if (error != cudaSuccess)
{
std::cout << "cuda error: " << cudaGetErrorString(error) << " at " << file << ":" << line << std::endl;
exit(1);
}
}
void checkCublas(cublasStatus_t error, const char * file, size_t line) {
if (error != CUBLAS_STATUS_SUCCESS) {
std::cout << "cublas error: " << cublasGetStatusString(error) << " at " << file << ":" << line << std::endl;
exit(1);
}
}
#define CHECK(error) check(error, __FILE__, __LINE__)
#define CHECK_CUBLAS(error) checkCublas(error, __FILE__, __LINE__)
int main(int argc, char ** argv) {
cublasHandle_t handle;
CHECK_CUBLAS(cublasCreate(&handle));
const size_t N = 10;
const size_t BYTES = N * sizeof(double);
const double E = 1e-5;
/* Prepare the data */
std::vector<double> A(N);
std::vector<double> B(N);
for (size_t i = 0; i < N; i++) {
A[i] = i;
B[i] = i + N;
}
/* Send the data */
double * devA;
double * devB;
CHECK(cudaMalloc(&devA, BYTES));
CHECK(cudaMalloc(&devB, BYTES));
CHECK(cudaMemcpy(devA, A.data(), BYTES, cudaMemcpyHostToDevice));
CHECK(cudaMemcpy(devB, B.data(), BYTES, cudaMemcpyHostToDevice));
/* Calculate */
const int strideA = 1;
const int strideB = 1;
double result = 0;
CHECK_CUBLAS(cublasDdot(handle, A.size(), devA, strideA, devB, strideB, &result));
CHECK(cudaDeviceSynchronize());
double expected = 0;
for (size_t i = 0; i < N; i++) {
expected += A[i] * B[i];
}
if (std::abs(result - expected) > E) {
std::cout << "Result " << result << " is different from expected " << expected << std::endl;
}
CHECK_CUBLAS(cublasDestroy(handle));
std::cout << "Example finished." << std::endl;
return 0;
}
CMakeLists.txt
used#
cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
project(example_blas LANGUAGES CUDA)
add_executable(example_blas blas.cu)
target_link_libraries(example_blas PRIVATE cublas redscale)