00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00028
00029 #include "toeplitz.h"
00030
00031
00032 typedef struct interval_node_struct {
00033 double lb, ub;
00034 integer eigs, prev_eigs, post_eigs;
00035 struct interval_node_struct *next;
00036 } interval_node;
00037
00038
00039 void calc_intervals(interval_node *ilist, integer *n, double *t, double *lbg, double *ubg, integer *maxEig_interval);
00040
00041
00042 typedef struct sort_aux_struct_ {
00043 T el;
00044 unsigned int index;
00045 } sort_aux_struct;
00046
00051 static int sort_eigenvalues_aux(const void *p1, const void *p2) {
00052 T e1 = ((sort_aux_struct *)p1)->el;
00053 T e2 = ((sort_aux_struct *)p2)->el;
00054
00055 if(e1 < e2) return -1;
00056 else if(e1 > e2) return 1;
00057 else return 0;
00058 }
00059
00068 void sort_eigenvalues(unsigned int extracted, unsigned int n, T *el, T *ev) {
00069
00070
00071 unsigned int i;
00072 sort_aux_struct sort[extracted];
00073 for(i=0; i<extracted; ++i) {
00074 sort[i].el = el[i];
00075 sort[i].index = i;
00076 }
00077
00078
00079 qsort(sort, extracted, sizeof(sort_aux_struct), sort_eigenvalues_aux);
00080
00081
00082 T *ev_aux = (T *) malloc(extracted * n * sizeof(T));
00083 memcpy(ev_aux, ev, extracted * n * sizeof(T));
00084
00085 for(i=0; i<extracted; ++i) {
00086 el[i] = sort[i].el;
00087 if(i != sort[i].index) memcpy(ev + i * n, ev_aux + sort[i].index * n, n * sizeof(T));
00088 }
00089
00090 free(ev_aux);
00091 }
00092
00105 interval_cpu_info *calc_eigen_intervals(T *t, unsigned int n, T lbg, T ubg, unsigned int max_eig_interval, unsigned int *num_intervals) {
00106
00107
00108 *num_intervals = 0;
00109
00110
00111 if(!t || !num_intervals || n == 0 || max_eig_interval == 0) return NULL;
00112
00113
00114 #ifdef SINGLE
00115 int i;
00116 double *t_double = (double *) malloc(n * sizeof(double));
00117 for(i=0; i<n; ++i) t_double[i] = t[i];
00118 #else
00119 double *t_double = t;
00120 #endif
00121
00122
00123 integer n_ = n;
00124 interval_node interval_list;
00125 double lbg_ = lbg, ubg_ = ubg;
00126 calc_intervals(&interval_list, &n_, t_double, &lbg_, &ubg_, (integer *) &max_eig_interval);
00127 #ifdef SINGLE
00128 free(t_double);
00129 #endif
00130
00131
00132 interval_node *i_node;
00133 for(i_node = interval_list.next; i_node; i_node = i_node->next, ++(*num_intervals));
00134
00135
00136 interval_cpu_info *interval_cpu = (interval_cpu_info *) calloc(*num_intervals, sizeof(interval_cpu_info));
00137 interval_cpu_info *i_cpu = interval_cpu;
00138
00139 for(i_node = interval_list.next; i_node; i_node = i_node->next, ++i_cpu) {
00140
00141
00142 i_cpu->lb = i_node->lb;
00143 i_cpu->ub = i_node->ub;
00144 i_cpu->num_eigs = i_node->eigs;
00145 }
00146
00147
00148 return interval_cpu;
00149 }
00150
00174 unsigned int extract_eigenvalues(unsigned int n, unsigned int k, unsigned int max_k, unsigned int p_lda,
00175 unsigned int *converged, unsigned int *extracted, unsigned int required,
00176 T a, T b, const T *t, const T *p, const T *alpha, const T *beta, T tolerance,
00177 T *l, T *v) {
00178
00179
00180 if(!converged || !extracted || !t || !p || !alpha || !beta || !l || !v) return 0;
00181 if(*extracted >= required) return 0;
00182 if(k > max_k) return 0;
00183
00184
00185 T *el = (T *) malloc(k * sizeof(T));
00186 T *ev = (T *) malloc(k * k * sizeof(T));
00187 T *work = (T *) malloc((2*k - 2) * sizeof(T));
00188 T *aux = (T *) malloc(max(k, n) * sizeof(T));
00189
00190
00191 {
00192 char jobz = 'V';
00193 integer n_ = k, ldz_ = k, info;
00194
00195 memcpy(el, alpha, k * sizeof(T));
00196 memcpy(aux, beta, k * sizeof(T));
00197
00198 _stev(&jobz, &n_, el, aux, ev, &ldz_, work, &info);
00199 if(info != 0) fprintf(stderr, "Error during tridiagonal eigenvalue extraction: %d elements did not converge\n", (int)info);
00200 }
00201
00202
00203 int i, j;
00204 T sigma = (a + b) * (T)0.5;
00205 for(i=0; i<k; ++i) el[i] = sigma + (T)1.0 / el[i];
00206
00207
00208 T *ritz = (T *) malloc(n * k * sizeof(T));
00209 _gemm(CblasColMajor, CblasNoTrans, CblasNoTrans, n, k, k, (T)1.0, p, p_lda, ev, k, (T)0.0, ritz, n);
00210
00211
00212 unsigned int extracted_now = 0;
00213 for(j=*converged; j<k; ++j) {
00214 if((j & 1) == 0) i = k - (j >> 1) - 1;
00215 else i = j >> 1;
00216
00217
00218 if(k >= max_k || beta[k-1] * (ev + k * i)[k-1] <= tolerance) {
00219
00220
00221 toeplitz_mult(n, t, ritz + i * n, aux);
00222 _axpy(n, -el[i], ritz + i * n, 1, aux, 1);
00223 T residual = _nrm2(n, aux, 1);
00224
00225
00226 if(residual <= tolerance) {
00227 ++(*converged);
00228 if(el[i] >= a && el[i] < b) {
00229 ++extracted_now;
00230 l[*extracted] = el[i];
00231 memcpy(v + *extracted * n, ritz + i * n, n * sizeof(T));
00232 if(++(*extracted) >= required) break;
00233 }
00234 }
00235
00236 } else break;
00237 }
00238
00239
00240 free(el); free(ev); free(ritz);
00241 free(work); free(aux);
00242
00243
00244 return extracted_now;
00245 }
00246