#include "toeplitz.h"
#include "gpu_vector_kernel.cu"
Go to the source code of this file.
Functions | |
__device__ void | memcpy4 (void *dest, void *src, unsigned int size) |
__device__ void | levinson (unsigned int n, T td, const T *t, T *b, T *x, T *work, T *swork) |
__device__ void | reorthogonalize (unsigned int n, unsigned int k_size, const T *p, T *v, T *h, T *swork) |
__device__ void | si2w (unsigned int n, T td, T *t, unsigned int min_k, unsigned int num_it, unsigned int max_k, T sigma, T *p, T *q, tridiagonal_entry *m, T *work) |
__global__ void | si2w_parallel (unsigned int n, T td, T *t, unsigned int k, unsigned int inc_k, unsigned int max_k, interval_gpu_info *exec_window) |
Definition in file toeplitz_kernel.cu.
__device__ void levinson | ( | unsigned int | n, | |
T | td, | |||
const T * | t, | |||
T * | b, | |||
T * | x, | |||
T * | work, | |||
T * | swork | |||
) |
Solve a symmetric Toeplitz system of the form toeplitz(t) * x = b
n | Size of [td t], x and y vectors (size of Toeplitz matrix). | |
td | Diagional value of the symmetric Toeplitz matrix. | |
t | Vector defining the rest of symmetric Toeplitz matrix values. | |
b | Right-hand side of the system. Will be normalized by td. | |
x | Vector that will contain the solution of the system. | |
work | Work vector of size 2 * align(n). | |
swork | Shared memory workspace. Requires 6 * blockSize - 1 floats. |
Definition at line 71 of file toeplitz_kernel.cu.
__device__ void memcpy4 | ( | void * | dest, | |
void * | src, | |||
unsigned int | size | |||
) |
Perform a coalesced raw copy from aligned addresses with data sizes multiple of 4.
dest | Source address. Assumed to be aligned to 16 * sizeof(float) if points to global memory. | |
src | Destination address. Assumed to be aligned to 16 * sizeof(float) if points to global memory. | |
size | Data size in bytes. Should be always a multiple of 4. |
Definition at line 41 of file toeplitz_kernel.cu.
__device__ void reorthogonalize | ( | unsigned int | n, | |
unsigned int | k_size, | |||
const T * | p, | |||
T * | v, | |||
T * | h, | |||
T * | swork | |||
) |
Reorthogonalize a vector to a Krylov subspace base.
n | Size of v vector and number of rows of p matrix. | |
k_size | Current size of Krylov subspace. Number of columns of p matrix. | |
p | Pointer to a column-ordered matrix defining the base of the Krylov subspace. | |
v | Pointer to the vector to be reorthogonalized. | |
h | Work array of size k_size. | |
swork | Shared memory workspace. Requires 2 * blockSize floats. |
Definition at line 186 of file toeplitz_kernel.cu.
__device__ void si2w | ( | unsigned int | n, | |
T | td, | |||
T * | t, | |||
unsigned int | min_k, | |||
unsigned int | num_it, | |||
unsigned int | max_k, | |||
T | sigma, | |||
T * | p, | |||
T * | q, | |||
tridiagonal_entry * | m, | |||
T * | work | |||
) |
Shift and invert 2-way Lanczos routine for Toeplitz matrices. Calculates symmetric and skew-symmetric tridiagonal values, but leaves final eigenvalue calculation to CPU side.
n | Size of the symmetric Toeplitz matrix. | |
td | Main diagonal value of the symmetric Toeplitz matrix. | |
t | Remaning n-1 values from symmetric Toeplitz matrix. | |
min_k | Starting size of the Krylov subspace (starting value 0 ). Can be used to resume a previous extraction. | |
num_it | Number of iterations to perform in the Krylov subspace. | |
max_k | Maximum size of the Krylov subspace. Iterations will halt if reached. | |
sigma | Shift value for centering eigenvalue extraction (depends on extraction interval). | |
p | Symmetric lanczos vectors of Krylov subspace (size align(n) x max_k). | |
q | Skew-symmetric lanczos vectors of Krylov subspace (size align(n) x max_k). | |
m | Structure array containing values from symmetric and skew-symmetric tridiagonal matrices (size max_k). | |
work | Workspace array of size >= max(5 * align(n), 4 * align(n) + max_k). |
Definition at line 266 of file toeplitz_kernel.cu.
__global__ void si2w_parallel | ( | unsigned int | n, | |
T | td, | |||
T * | t, | |||
unsigned int | k, | |||
unsigned int | inc_k, | |||
unsigned int | max_k, | |||
interval_gpu_info * | exec_window | |||
) |
Entry point for parallel shift-and-invert 2-way Lanczos evaluation.
n | Size of the symmetric Toeplitz matrix. | |
td | Main diagonal value of the symmetric Toeplitz matrix. | |
t | Remaning n-1 values from symmetric Toeplitz matrix. | |
k | Current value of subspace size iterator. Real subspace size will be calculed using interval data. | |
inc_k | Number of iterations to perform in the Krylov subspace. | |
max_k | Maximum size of the Krylov subspace. Iterations will halt if reached. | |
exec_window | Pointer to execution window containing GPU-side descriptors of the intervals to be calculated. |
Definition at line 397 of file toeplitz_kernel.cu.