Vec8f weight_cap_f, weights, cap_weights;
short val[8];
short current_hit;
..... inline float stats::weight() {
// load val into vector
Vec8s val_s = Vec8s().load(val);
// replace first element by current_hit
Vec8s total_s = blend<0,9,10,11,12,13,14,15>(Vec8s(current_hit),
val_s);
// convert short to int
Vec8i total_i = Vec8i(extend_low(total_s),extend_high(total_s));
// convert to float
Vec8f total_f = to_float(total_i);
// find overcap values
Vec8f over_cap = select(total_f > weight_cap_f, weight_cap_f - total_f, 0.0f);
// weight it
Vec8f sum = ( total_f + over_cap ) * weights - over_cap * cap_weights;
// sum it all up
return horizontal_add( sum );
}
You may consider using the same type throughout to avoid the many type conversions - they are expensive. I havent tested this code but I think you get the idea.
And don't expect me to solve all your programming problems... |