/* compiled with -msse2, etc*/ //#include /* MMX */ //#include /* SSE */ #include /* SSE2 */ //#include /* SSE3 */ //#include /* SSSE3 */ //#include /* SSE4.2 SSE4.1 */ #include #include #include typedef unsigned long long ticks; static __inline__ ticks getticks(void) { unsigned a, d; asm("cpuid"); asm volatile("rdtsc" : "=a" (a), "=d" (d)); return (((ticks)a) | (((ticks)d) << 32)); } /* plain version x = a*x */ void sapxy(int n, float a, float *x) { int i; for (i=0; itv_nsec - b->tv_nsec; r += ((long long)(c->tv_sec - b->tv_sec) ) * 1000000000; printf("duration = %lld nanoseconds\n", r); } main() { unsigned long long int t1, t2, t3; struct timespec bb, ee; int i, j; for (i=0; i<1000000; i++) { a[i] = i*1.0; b[i] = i*1.0; } // t1 = getticks(); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &bb); sapxy(1000000, 3.14159, a); // t2 = getticks(); //printf("time = %lld\n", t2-t1); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ee); print_duration(&bb, &ee); // t1 = getticks(); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &bb); sse_sapxy(1000000, 3.14159, b); // t2 = getticks(); clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ee); print_duration(&bb, &ee); // printf("time = %lld\n", t2-t1); for (i=0; i<999999; i++) { if (a[i] != b[i]) {printf("Wrong.\n"); exit(0);} //printf("%d %f %f\n", i, z3[i], z4[i]); } printf("Correct, results matched.\n"); return 0; }