This document explains how to compile SFMT for users who are using UNIX like systems (for example Linux, Free BSD, cygwin, osx, etc) on terminal. I can't help those who use IDE (Integrated Development Environment,) please see your IDE's help to use SIMD feature of your CPU.
Check if SFMT.c and Makefile are in your current directory. If not, cd to the directory where they exist. Then, type
make std
If it causes an error, try to type
cc -DMEXP=19937 -o test-std-M19937 test.c
or try to type
gcc -DMEXP=19937 -o test-std-M19937 test.c
If success, then check the test program. Type
./test-std-M19937 -b32
You will see many random numbers displayed on your screen. If you want to check these random numbers are correct output, redirect output to a file and diff it with SFMT.19937.out.txt, like this:
./test-std-M19937 -b32 > foo.txt diff -w foo.txt SFMT.19937.out.txt
Silence means they are the same because diff reports the difference of two file.
If you want to know the generation speed of SFMT, type
./test-std-M19937 -s
It is very slow. To make it fast, compile it with -O3 option. If your compiler is gcc, you should specify -fno-strict-aliasing option with -O3. type
gcc -O3 -fno-strict-aliasing -DMEXP=19937 -o test-std-M19937 test.c ./test-std-M19937 -s
If your CPU supports SSE2 and you can use gcc version 3.4 or later, you can make test-sse2-Mxxx. To do this, type
make sse2
or type
gcc -O3 -msse2 -fno-strict-aliasing -DHAVE_SSE2=1 -DMEXP=19937 -o test-sse2-M19937 test.c
If everything works well,
./test-sse2-M19937 -s
shows much shorter time than test-std-M19937 -s.
If you are using Macintosh computer with PowerPC G4 or G5, and your gcc version is later 3.3, you can make test-alti-M19937. To do this, type
make osx-alti
or type
gcc -O3 -faltivec -fno-strict-aliasing -DHAVE_ALTIVEC=1 -DMEXP=19937 -o test-alti-M19937 test.c
If everything works well,
./test-alti-M19937 -s
shows much shorter time than test-std-M19937 -s.
If you are using a CPU which supports AltiVec under Linux, use alti instead of osx-alti.
To make test program and check 32-bit output automatically for all supported MEXPs of SFMT, type
make std-check
To check test program optimized for 64bit output of big endian CPU, type
make big-check
To check test program optimized for SSE2, type
make sse2-check
To check test program optimized for OSX AltiVec, type
make osx-alti-check
To check test program optimized for OSX AltiVec and 64bit output, type
make osx-altibig-check
These commands may take some time.
Here is a very simple program sample1.c which calculates PI using Monte-Carlo method.
#include <stdio.h> #include <stdlib.h> #include "SFMT.h" int main(int argc, char* argv[]) { int i, cnt, seed; double x, y, pi; const int NUM = 10000; if (argc >= 2) { seed = strtol(argv[1], NULL, 10); } else { seed = 12345; } cnt = 0; init_gen_rand(seed); for (i = 0; i < NUM; i++) { x = genrand_res53(); y = genrand_res53(); if (x * x + y * y < 1.0) { cnt++; } } pi = (double)cnt / NUM * 4; printf("%lf\n", pi); return 0; }
To compile sample1.c with SFMT.c with the period of 2607, type
gcc -DMEXP=607 -o sample1 SFMT.c sample1.c
If your CPU is BIG ENDIAN you need to type
gcc -DMEXP=607 -DBIG_ENDIAN64 -o sample1 SFMT.c sample1.c
because genrand_res53() uses gen_rand64().
If your CPU supports SSE2 and you want to use optimized SFMT for SSE2, type
gcc -msse2 -DHAVE_SSE2 -DMEXP=607 -o sample1 SFMT.c sample1.c
If your CPU supports AltiVec and you want to use optimized SFMT for AltiVec, type
gcc -faltivec -DBIG_ENDIAN64 -DHAVE_ALTIVEC -DMEXP=607 -o sample1 SFMT.c sample1.c
Here is sample2.c which modifies sample1.c. The block call fill_array64 is much faster than sequential call, but it needs an aligned memory. The standard function to get an aligned memory is posix_memalign, but it isn't usable in every OS.
#include <stdio.h> #define _XOPEN_SOURCE 600 #include <stdlib.h> #include "SFMT.h" int main(int argc, char* argv[]) { int i, j, cnt, seed; double x, y, pi; const int NUM = 10000; const int R_SIZE = 2 * NUM; int size; uint64_t *array; if (argc >= 2) { seed = strtol(argv[1], NULL, 10); } else { seed = 12345; } size = get_min_array_size64(); if (size < R_SIZE) { size = R_SIZE; } #if defined(__APPLE__) || \ (defined(__FreeBSD__) && __FreeBSD__ >= 3 && __FreeBSD__ <= 6) printf("malloc used\n"); array = malloc(sizeof(double) * size); if (array == NULL) { printf("can't allocate memory.\n"); return 1; } #elif defined(_POSIX_C_SOURCE) printf("posix_memalign used\n"); if (posix_memalign((void **)&array, 16, sizeof(double) * size) != 0) { printf("can't allocate memory.\n"); return 1; } #elif defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3)) printf("memalign used\n"); array = memalign(16, sizeof(double) * size); if (array == NULL) { printf("can't allocate memory.\n"); return 1; } #else /* in this case, gcc doesn't support SSE2 */ printf("malloc used\n"); array = malloc(sizeof(double) * size); if (array == NULL) { printf("can't allocate memory.\n"); return 1; } #endif cnt = 0; j = 0; init_gen_rand(seed); fill_array64(array, size); for (i = 0; i < NUM; i++) { x = to_res53(array[j++]); y = to_res53(array[j++]); if (x * x + y * y < 1.0) { cnt++; } } free(array); pi = (double)cnt / NUM * 4; printf("%lf\n", pi); return 0; }
To compile sample2.c with SFMT.c with the period of 22281, type
gcc -DMEXP=2281 -o sample2 SFMT.c sample2.c
or
gcc -DMEXP=2281 -DBIG_ENDIAN64 -o sample2 SFMT.c sample2.c
If your CPU supports SSE2 and you want to use optimized SFMT for SSE2, type
gcc -msse2 -DHAVE_SSE2 -DMEXP=2281 -o sample2 SFMT.c sample2.c
If your CPU supports AltiVec and you want to use optimized SFMT for AltiVec, type
gcc -faltivec -DHAVE_ALTIVEC -DMEXP=2281 -DBIG_ENDIAN64 -o sample2 SFMT.c sample2.c
or type
gcc -faltivec -DHAVE_ALTIVEC -DBIG_ENDIAN64 -DONLY64 -DMEXP=2281 -o sample2 SFMT.c sample2.c
The effect of the option -DONLY64 is: When -DONLY64 option is used, the executive file can generate 64-bit integers faster but 32-bit output is not supported.
Here is sample3.c which modifies sample1.c. This is very similar to sample1.c. The difference is only one line. It include "SFMT.c" instead of "SFMT.h" .
#include <stdio.h> #include <stdlib.h> #include "SFMT.c" int main(int argc, char* argv[]) { int i, cnt, seed; double x, y, pi; const int NUM = 10000; if (argc >= 2) { seed = strtol(argv[1], NULL, 10); } else { seed = 12345; } cnt = 0; init_gen_rand(seed); for (i = 0; i < NUM; i++) { x = genrand_res53(); y = genrand_res53(); if (x * x + y * y < 1.0) { cnt++; } } pi = (double)cnt / NUM * 4; printf("%lf\n", pi); return 0; }
To compile sample3.c, type
gcc -DMEXP=1279 -o sample3 sample3.c
or
gcc -DMEXP=1279 -DBIG_ENDIAN64 -o sample3 sample3.c
If your CPU supports SSE2 and you want to use optimized SFMT for SSE2, then type
gcc -msse2 -DHAVE_SSE2 -DMEXP=1279 -o sample3 sample3.c
If your CPU supports AltiVec and you want to use optimized SFMT for AltiVec, type
gcc -faltivec -DHAVE_ALTIVEC -DBIG_ENDIAN64 -DMEXP=1279 -o sample3 sample3.c
or type
gcc -faltivec -DHAVE_ALTIVEC -DBIG_ENDIAN64 -DONLY64 -DMEXP=1279 -o sample3 sample3.c
Here is sample4.c which modifies sample1.c. The 32-bit integer seed can only make 232 kinds of initial state, to avoid this problem, SFMT provides init_by_array function. This sample uses init_by_array function which initialize the internal state array with an array of 32-bit. The size of an array can be larger than the internal state array and all elements of the array are used for initialization, but too large array is wasteful.
#include <stdio.h> #include <string.h> #include "SFMT.h" int main(int argc, char* argv[]) { int i, cnt, seed_cnt; double x, y, pi; const int NUM = 10000; uint32_t seeds[100]; if (argc >= 2) { seed_cnt = 0; for (i = 0; (i < 100) && (i < strlen(argv[1])); i++) { seeds[i] = argv[1][i]; seed_cnt++; } } else { seeds[0] = 12345; seed_cnt = 1; } cnt = 0; init_by_array(seeds, seed_cnt); for (i = 0; i < NUM; i++) { x = genrand_res53(); y = genrand_res53(); if (x * x + y * y < 1.0) { cnt++; } } pi = (double)cnt / NUM * 4; printf("%lf\n", pi); return 0; }
To compile sample4.c, type
gcc -DMEXP=19937 -o sample4 SFMT.c sample4.c
or
gcc -DMEXP=19937 -DBIG_ENDIAN64 -o sample4 SFMT.c sample4.c
Now, seed can be a string. Like this:
./sample4 your-full-name
Here is a list of C preprocessor definitions that users can specify to control code generation. These macros must be set just after -D compiler option.
32-bit output | LITTLE ENDIAN 64-bit output | BIG ENDIAN 64-bit output | |
required | MEXP | MEXP | MEXP, BIG_ENDIAN64 |
optional | HAVE_SSE2, HAVE_ALTIVEC | HAVE_SSE2 | HAVE_ALTIVEC, ONLY64 |