From: Andi Kleen <andi@firstfloor.org>
To: cpw@sgi.com, linux-numa@vger.kernel.org
Subject: [PATCH] [5/15] Compile numademo with vectorization if available
Date:	Mon, 16 Jun 2008 14:00:20 +0200 (CEST)

Enable vectorization for the benchmark files if available. It looks like gcc 
is enable to vectorize all the interesting loops in stream at least. 
Unfortunately it doesn't do any good on Mersenne twister.

Also change BENCH_CFLAGS to -O3 just in case. gcc44 will actually auto vectorize
in this case too and it might enable more aggressive optimizations in other
compilers (like icc) too.

---
 Makefile |   11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

Index: numactl-2.0.0/Makefile
===================================================================
--- numactl-2.0.0.orig/Makefile
+++ numactl-2.0.0/Makefile
@@ -3,7 +3,7 @@ CFLAGS :=  -g -Wall
 # these are used for the benchmarks in addition to the normal CFLAGS. 
 # Normally no need to overwrite unless you find a new magic flag to make
 # STREAM run faster.
-BENCH_CFLAGS := -O2 -ffast-math -funroll-loops
+BENCH_CFLAGS := -O3 -ffast-math -funroll-loops
 # for compatibility with old releases
 CFLAGS += ${OPT_CFLAGS}
 override CFLAGS += -I.
@@ -15,10 +15,17 @@ ifeq ($(THREAD_SUPPORT),no)
 	override CFLAGS += -D__thread=""
 endif
 
+# find out if compiler supports -ftree-vectorize
+THREAD_SUPPORT := $(shell touch empty.c ; if $(CC) $(CFLAGS) -c -ftree-vectorize empty.c -o empty.o \
+			>/dev/null 2>/dev/null ; then echo "yes" ; else echo "no"; fi)
+ifeq ($(THREAD_SUPPORT),yes)
+	BENCH_CFLAGS += -ftree-vectorize
+endif
+
 CLEANFILES := numactl.o libnuma.o numactl numademo numademo.o distance.o \
 	      memhog libnuma.so libnuma.so.1 numamon numamon.o syscall.o bitops.o \
 	      memhog.o util.o stream_main.o stream_lib.o shm.o stream clearcache.o \
-	      test/pagesize test/tshared test/mynode.o test/tshared.o mt.o \
+	      test/pagesize test/tshared test/mynode.o test/tshared.o mt.o empty.o empty.c \
 	      test/mynode test/ftok test/prefered test/randmap \
 	      .depend .depend.X test/nodemap test/distance test/tbitmap \
 	      test/after test/before threadtest test_move_pages \
