void ddot_strided(LA_Dvector_ddot_stride_binop *xy, double *result) { int i, start, stride, m; double local_sum = 0.0, sum, *x_data, *y_data; LA_Dvector *x, *y; /* Dereference the binary vector operands: */ x = xy -> binop.x; y = xy -> binop.y; /* Determine the stride based on type */ start = xy -> local_start; stride = xy -> local_stride; /* Sum up my part (non-optimized) */ m = x->v.m; x_data = &(x->v.data[0]); y_data = &(y->v.data[0]); for (i = start; i < m; i += stride) local_sum += x_data[i] * y_data[i]; /* Get the sum of all parts */ MPI_Allreduce(&local_sum, &sum, 1, MPI_DOUBLE, MPI_SUM, xy->binop.comm); /* Return result */ xy -> binop.error = 0; *result = sum; } void ddot_strided_blas(LA_Dvector_ddot_stride_binop *xy, double *result) { int start, stride; double local_sum = 0.0, sum; LA_Dvector *x, *y; /* Dereference the binary vector operands: */ x = xy -> binop.x; y = xy -> binop.y; /* Determine the stride based on type */ start = xy -> local_start; stride = xy -> local_stride; /* Sum up my part */ blas_ddot(&(x->v.m), &(x->v.data[start]), &stride, &(y->v.data[start]), &stride); /* Get the sum of all parts */ MPI_Allreduce(&local_sum, &sum, 1, MPI_DOUBLE, MPI_SUM, xy->binop.comm); /* Return result */ xy -> binop.error = 0; *result = sum; }