项目作者: NLESC-JCER

项目描述 :
Offload Eigen operations to GPUs
高级语言: C++
项目地址: git://github.com/NLESC-JCER/EigenCuda.git
创建时间: 2018-12-21T17:44:22Z
项目社区:https://github.com/NLESC-JCER/EigenCuda

开源协议:Apache License 2.0

下载


DOI

EigenCuda

Offload the Eigen3 matrix matrix multiplication to an Nvidia GPU
using cublas.

CMake Installation

To compile execute:

  1. cmake -H. -Bbuild && cmake --build build

To Debug compile as:

  1. cmake -H. -Bbuild -DCMAKE_BUILD_TYPE=Debug && cmake --build build

Dependencies

This packages assumes that you have installed the following packages:

Usage

Matrix Multiplication

  1. #include "eigencuda.hpp"
  2. #include "cudapipeline.hpp"
  3. using eigencuda::CudaPipeline;
  4. using eigencuda::CudaMatrix;
  5. // Call the class to handle GPU resources
  6. CudaPipeline cuda_pip;
  7. Eigen::MatrixXd A = Eigen::MatrixXd::Zero(2, 2);
  8. Eigen::MatrixXd B = Eigen::MatrixXd::Zero(3, 2);
  9. Eigen::MatrixXd C = Eigen::MatrixXd::Zero(3, 2);
  10. Eigen::MatrixXd D = Eigen::MatrixXd::Zero(3, 2);
  11. Eigen::MatrixXd X = Eigen::MatrixXd::Zero(3, 2);
  12. Eigen::MatrixXd Y = Eigen::MatrixXd::Zero(3, 2);
  13. Eigen::MatrixXd Z = Eigen::MatrixXd::Zero(3, 2);
  14. // Define matrices
  15. A << 1., 2., 3., 4.;
  16. B << 5., 6., 7., 8., 9., 10.;
  17. C << 9., 10., 11., 12., 13., 14.;
  18. D << 13., 14., 15., 16., 17., 18.;
  19. X << 23., 34., 31., 46., 39., 58.;
  20. Y << 39., 58., 47., 70., 55., 82.;
  21. Z << 55., 82., 63., 94., 71., 106.;
  22. std::vector<Eigen::MatrixXd> tensor{B, C, D};
  23. std::vector<Eigen::MatrixXd> results(3, Eigen::MatrixXd::Zero(3, 2));
  24. CudaMatrix cuma_A{A, cuda_pip.get_stream()};
  25. CudaMatrix cuma_B{3, 2, cuda_pip.get_stream()};
  26. CudaMatrix cuma_C{3, 2, cuda_pip.get_stream()};
  27. for (Index i = 0; i < 3; i++) {
  28. cuma_B.copy_to_gpu(tensor[i]);
  29. cuda_pip.gemm(cuma_B, cuma_A, cuma_C);
  30. results[i] = cuma_C;
  31. }
  32. // Expected results
  33. bool pred_1 = X.isApprox(results[0]);
  34. bool pred_2 = Y.isApprox(results[1]);
  35. bool pred_3 = Z.isApprox(results[2]);
  36. }