#include "toeplitz.h"
Go to the source code of this file.
Functions | |
__device__ T | norm (unsigned int n, const T *x, T *swork) |
__device__ T | dot (unsigned int n, const T *x, const T *y, T *swork) |
__device__ T | dot_reverse_y (unsigned int n, const T *x, const T *y, T *swork) |
__device__ void | axpy (unsigned int n, T a, const T *x, T *y) |
__device__ void | axpy_reverse_x (unsigned int n, T a, const T *x, T *y, T *swork) |
__device__ void | axpxb_reverse_x (unsigned int n, T a, const T *x, T *y, T b, T *swork) |
Definition in file gpu_vector_kernel.cu.
__device__ void axpxb_reverse_x | ( | unsigned int | n, | |
T | a, | |||
const T * | x, | |||
T * | y, | |||
T | b, | |||
T * | swork | |||
) |
Performs x = b * (a * reverse(x) + x) operation.
n | Size of x vector. | |
a | Scalar factor. | |
x | Input vector. | |
y | Output vector. Can be x. | |
b | Result scaling value. | |
swork | Shared memory workspace. Requires 6 * blockSize - 1 floats. |
Definition at line 380 of file gpu_vector_kernel.cu.
__device__ void axpy | ( | unsigned int | n, | |
T | a, | |||
const T * | x, | |||
T * | y | |||
) |
Performs y = a *x + y operation.
n | Size of x and y vectors. | |
a | Scalar factor. | |
x | First vector. | |
y | Second vector, modified with result. |
Definition at line 245 of file gpu_vector_kernel.cu.
__device__ void axpy_reverse_x | ( | unsigned int | n, | |
T | a, | |||
const T * | x, | |||
T * | y, | |||
T * | swork | |||
) |
Performs y = a * reverse(x) + y operation.
n | Size of x and y vectors. | |
a | Scalar factor. | |
x | First vector. | |
y | Second vector, modified with result. Cannot be x. | |
swork | Shared memory workspace. Requires 5 * blockSize - 1 floats. |
Definition at line 278 of file gpu_vector_kernel.cu.
__device__ T dot | ( | unsigned int | n, | |
const T * | x, | |||
const T * | y, | |||
T * | swork | |||
) |
Calculates the dot product of x and y vectors.
n | Size of x and y vectors. | |
x | First vector. | |
y | Second vector. | |
swork | Shared memory workspace. Requires blockSize floats. |
Definition at line 110 of file gpu_vector_kernel.cu.
__device__ T dot_reverse_y | ( | unsigned int | n, | |
const T * | x, | |||
const T * | y, | |||
T * | swork | |||
) |
Calculates the dot product of x and reverse(y) vectors.
n | Size of x and y vectors. | |
x | First vector. | |
y | Second vector. | |
swork | Shared memory workspace. Requires 4 * blockSize - 1 floats. |
Definition at line 163 of file gpu_vector_kernel.cu.
__device__ T norm | ( | unsigned int | n, | |
const T * | x, | |||
T * | swork | |||
) |
Calculates the 2-norm (length) of a given vector. Should be faster than sqrt(dot(x, x)) since reads each component only once.
n | Size of x vector. | |
x | Input vector. | |
swork | Shared memory workspace. Requires blockSize floats. |
Definition at line 40 of file gpu_vector_kernel.cu.