Add CPU to all files

4 years ago · 3be55fe2a1
parent ec130dddd5
commit 3be55fe2a1
22 changed files with 2569 additions and 115 deletions
--- a/fftma_module/gen/analysis.ipynb
+++ b/fftma_module/gen/analysis.ipynb
--- a/fftma_module/gen/include/geostat.h
+++ b/fftma_module/gen/include/geostat.h
@ -297,7 +297,7 @@ struct realization_mod {
 void axes(double* ap, double* scf, int N);
 /*cardsin covariance value for lag h*/
-double cardsin(double h);
+double cardsin(double h, int cores);
 /*Cholesky decomposition of matrix C                    */
 /* C     : symetric positive-definite matrix recorded   */
@ -325,7 +325,7 @@ void coordinates(int maille, int i[3], struct grid_mod grid);
 /*variogram: structure defined above     */
 /*grid: structure defined above          */
 /*n: number of gridblocks along X,Y and Z*/
-void covariance(double* covar, struct vario_mod variogram, struct grid_mod grid, int n[3]);
+void covariance(double* covar, struct vario_mod variogram, struct grid_mod grid, int n[3], int cores);
 /*computation of the covariance matrix for the well data*/
 /*well coordinates are given as a number of cells       */
@ -355,10 +355,10 @@ void cov_matrix(double* C, struct vario_mod variogram, struct welldata_mod well,
 /*dj: distance along the Y axis                        */
 /*dk: distance along the Z axis                        */
 /* The returned value is the computed covariance value */
-double cov_value(struct vario_mod variogram, double di, double dj, double dk);
+double cov_value(struct vario_mod variogram, double di, double dj, double dk, int cores);
 /*cubic covariance value for lag h*/
-double cubic(double h);
+double cubic(double h, int cores);
 /*truncation of the power spectrum to remove     */
 /*high frequencies - isotropic case              */
@ -401,7 +401,7 @@ double exponential(double h);
 /*workr: utility real part vector for storage         */
 /*worki: utility imaginary part vector for storage    */
 /*The transformed data are returned in datar and datai*/
-void fourt(double* datar, double* datai, int nn[3], int ndim, int ifrwd, int icplx, double* workr, double* worki);
+void fourt(double* datar, double* datai, int nn[3], int ndim, int ifrwd, int icplx, double* workr, double* worki, int cores);
 /*calculates F(x) = (1/a)*exp(-x*x/2)*/
 double funtrun1(double x);
@ -410,7 +410,7 @@ double funtrun1(double x);
 float G(float x);
 /*gamma covariance value for lag h and exponent alpha*/
-double gammf(double h, double alpha);
+double gammf(double h, double alpha, int cores);
 /*returns the value ln(G(x))*/
 float gammln(float xx);
@ -422,7 +422,7 @@ float gammp(float a, float x);
 /*and unit variance, using ran1(idum) as the source */
 /*of uniform deviates                               */
 /*idum: seed                                        */
-double gasdev(long* idum, long* idum2, long* iy, long* iv);
+double gasdev(long* idum, long* idum2, long* iy, long* iv, int cores);
 /*gaussian covariance value for lag h*/
 double gaussian(double h);
@ -477,7 +477,7 @@ void gradual(struct grad_mod grad, float* Zo, float* Z, float* Zfinal, int n, st
 /*n: vector with the number of cells along the     */
 /*   X, Y and Z axes for the underlying grid       */
 /*   i = [0 1 2]                                   */
-void cgrid(struct vario_mod variogram, struct grid_mod grid, int n[3]);
+void cgrid(struct vario_mod variogram, struct grid_mod grid, int n[3], int cores);
 /*incomplete gamma function evaluated by its series*/
 /*representation as gamser, also returns ln(G(a))  */
@ -502,7 +502,7 @@ void krig_stat(float* b, int n, struct vario_mod variogram, struct welldata_mod
 /*i: considered direction                             */
 /*scf: correlation length                             */
 /*ap: normalized anisotropy axes                      */
-int length(int N, int i, double* scf, double* ap, double D, int Nvari);
+int length(int N, int i, double* scf, double* ap, double D, int Nvari, int cores);
 /*calculates L.Z/
 /* L     : lower triangular matrix recorded            */
@ -516,7 +516,7 @@ int length(int N, int i, double* scf, double* ap, double D, int Nvari);
 void LtimeZ(double* L, float* Z, float* b, int n);
 /*determines the greatest prime factor of an integer*/
-int maxfactor(int n);
+int maxfactor(int n, int cores);
 /*metrop returns a boolean varible that issues a       */
 /*verdict on whether to accept a reconfiguration       */
@ -542,7 +542,7 @@ double power(double h, double alpha);
 /*generates uniform deviates between 0 and 1*/
 /*idum: seed                                */
-double ran2(long* idum, long* idum2, long* iy, long* iv);
+double ran2(long* idum, long* idum2, long* iy, long* iv, int cores);
 /*calculates bt.b                */
 /* b : vector, bi, i = [0...n-1] */
--- a/fftma_module/gen/include/toolsFFTMA.h
+++ b/fftma_module/gen/include/toolsFFTMA.h
@ -36,7 +36,7 @@
 /*realout4: structure defining a yvelocity field */
 /*realout5: structure defining a zvelocity field */
-void FFTMA2(struct vario_mod variogram, struct grid_mod grid, int n[3], struct realization_mod* realin, struct realization_mod* realout);
+void FFTMA2(struct vario_mod variogram, struct grid_mod grid, int n[3], struct realization_mod* realin, struct realization_mod* realout, int cores);
 /* prebuild_gwn      */
 /* Produce a first construction in real space of the Gaussian white noise */
@ -51,7 +51,7 @@ void FFTMA2(struct vario_mod variogram, struct grid_mod grid, int n[3], struct r
 /*        must be a Gaussian white noise       */
 /*realization: structure defining a realization*/
-void prebuild_gwn(struct grid_mod grid, int n[3], struct realization_mod* realin, double* realization, int solver);
+void prebuild_gwn(struct grid_mod grid, int n[3], struct realization_mod* realin, double* realization, int solver, int cores);
 /* build_real   */
 /* build a realization in the spectral domain */
@ -65,7 +65,7 @@ void prebuild_gwn(struct grid_mod grid, int n[3], struct realization_mod* realin
 /*realization: vector defining the real part   */
 /*ireal: vector defining the i-part   */
-void build_real(int n[3], int NTOT, double* covar, double* realization, double* ireal);
+void build_real(int n[3], int NTOT, double* covar, double* realization, double* ireal, int cores);
 void clean_real(struct realization_mod* realin, int n[3], struct grid_mod grid, double* vectorresult, struct realization_mod* realout);
--- a/fftma_module/gen/lib_src/Py_kgeneration.c
+++ b/fftma_module/gen/lib_src/Py_kgeneration.c
@ -46,7 +46,7 @@ void Py_kgeneration(long seed, struct grid_mod grid, struct statistic_mod stat,
    generate(&seed, N, Z, cores);
    /*FFTMA*/
-    FFTMA2(variogram, grid, n, Z, Y);
+    FFTMA2(variogram, grid, n, Z, Y, cores);
    /* make a log normal realization */
    if (stat.type == 1 || stat.type == 2) {
--- a/fftma_module/gen/lib_src/build_real.c
+++ b/fftma_module/gen/lib_src/build_real.c
@ -1,5 +1,6 @@
 #include "geostat.h"
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <stdarg.h>
 #include <stddef.h>
@ -20,7 +21,7 @@
 /*realization: vector defining the real part   */
 /*ireal: vector defining the i-part   */
-void build_real(int n[3], int NTOT, double* covar, double* realization, double* ireal) {
+void build_real(int n[3], int NTOT, double* covar, double* realization, double* ireal, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -30,6 +31,13 @@ void build_real(int n[3], int NTOT, double* covar, double* realization, double*
    double temp;
    log_info("RESULT = in progress, NTOT = %d, covar = %f, n[0] = %d, n[1] = %d, n[2] = %d", NTOT, *covar, n[0], n[1], n[2]);
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    /*decomposition and multiplication in the spectral domain*/
    for (k = 1; k <= n[2]; k++) {
        for (j = 1; j <= n[1]; j++) {
@ -50,6 +58,14 @@ void build_real(int n[3], int NTOT, double* covar, double* realization, double*
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
--- a/fftma_module/gen/lib_src/cardsin.c
+++ b/fftma_module/gen/lib_src/cardsin.c
@ -1,17 +1,25 @@
 #include "genlib.h"
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <stdio.h>
 #include <time.h>
 /*cardsin covariance function*/
-double cardsin(double h) {
+double cardsin(double h, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
    clock_t t = clock();
    log_info("RESULT = in progress, h = %f", h);
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    float delta = 20.371;
    double z;
@ -26,6 +34,14 @@ double cardsin(double h) {
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
--- a/fftma_module/gen/lib_src/cgrid.c
+++ b/fftma_module/gen/lib_src/cgrid.c
@ -1,5 +1,6 @@
 #include "geostat.h"
 #include "log.h"
 #include "memory.h"
 #include <stdlib.h>
 #include <time.h>
@ -11,7 +12,7 @@
 /*n: vector with the number of cells along the     */
 /*   X, Y and Z axes for the underlying grid       */
 /*   i = [0 1 2]                                   */
-void cgrid(struct vario_mod variogram, struct grid_mod grid, int n[3]) {
+void cgrid(struct vario_mod variogram, struct grid_mod grid, int n[3], int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -22,6 +23,12 @@ void cgrid(struct vario_mod variogram, struct grid_mod grid, int n[3]) {
    log_info("RESULT = in progress");
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    if (n == NULL || n[0] == 0 || n[1] == 0 || n[2] == 0) {
        for (i = 0; i < 3; i++) {
@ -39,7 +46,7 @@ void cgrid(struct vario_mod variogram, struct grid_mod grid, int n[3]) {
                D = grid.DZ;
                break;
            }
-            n[i] = length(N, i, variogram.scf, variogram.ap, D, variogram.Nvario);
+            n[i] = length(N, i, variogram.scf, variogram.ap, D, variogram.Nvario, cores);
        }
    } else {
        if ((n[0] < grid.NX) || (n[1] < grid.NY) || (n[2] < grid.NZ)) {
@ -50,6 +57,14 @@ void cgrid(struct vario_mod variogram, struct grid_mod grid, int n[3]) {
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
--- a/fftma_module/gen/lib_src/clean_real.c
+++ b/fftma_module/gen/lib_src/clean_real.c
@ -1,5 +1,6 @@
 #include "geostat.h"
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <stdarg.h>
 #include <stddef.h>
@ -8,7 +9,7 @@
 #include <string.h>
 #include <time.h>
-void clean_real(struct realization_mod* realin, int n[3], struct grid_mod grid, double* vectorresult, struct realization_mod* realout) {
+void clean_real(struct realization_mod* realin, int n[3], struct grid_mod grid, double* vectorresult, struct realization_mod* realout, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -23,6 +24,13 @@ void clean_real(struct realization_mod* realin, int n[3], struct grid_mod grid,
    log_info("RESULT = in progress, NTOT = %f", NTOT);
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    if (realout->vector == NULL || realout->n != realin->n) {
        realout->vector = (double*)malloc(realin->n * sizeof(double));
        if (realout->vector == NULL) {
@ -51,6 +59,14 @@ void clean_real(struct realization_mod* realin, int n[3], struct grid_mod grid,
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    log_info("RESULT = success, ELAPSED = %f seconds, DIF USED VIRTUAL MEM = %5.1f MB", time_taken, *used_ram_tf - *used_ram_t0);
    free(used_ram_t0);
--- a/fftma_module/gen/lib_src/cov_value.c
+++ b/fftma_module/gen/lib_src/cov_value.c
@ -1,11 +1,12 @@
 #include "genlib.h"
 #include "geostat.h"
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <time.h>
 /*selection of model covariance*/
-double cov_value(struct vario_mod variogram, double di, double dj, double dk) {
+double cov_value(struct vario_mod variogram, double di, double dj, double dk, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -13,6 +14,13 @@ double cov_value(struct vario_mod variogram, double di, double dj, double dk) {
    log_info("RESULT = in progress, di = %f, dj = %f, dk = %f", di, dj, dk);
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    double hx, hy, hz, h;
    double cov;
    int k;
@ -37,16 +45,16 @@ double cov_value(struct vario_mod variogram, double di, double dj, double dk) {
            cov += variogram.var[k] * spherical(h);
            break;
        case 4:
-            cov += variogram.var[k] * cardsin(h);
+            cov += variogram.var[k] * cardsin(h, cores);
            break;
        case 5:
            cov += variogram.var[k] * stable(h, variogram.alpha[k]);
            break;
        case 6:
-            cov += variogram.var[k] * gammf(h, variogram.alpha[k]);
+            cov += variogram.var[k] * gammf(h, variogram.alpha[k], cores);
            break;
        case 7:
-            cov += variogram.var[k] * cubic(h);
+            cov += variogram.var[k] * cubic(h, cores);
            break;
        case 8:
            cov += variogram.var[k] * nugget(h);
@ -60,6 +68,14 @@ double cov_value(struct vario_mod variogram, double di, double dj, double dk) {
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
--- a/fftma_module/gen/lib_src/covariance.c
+++ b/fftma_module/gen/lib_src/covariance.c
@ -1,10 +1,11 @@
 #include "geostat.h"
 #include "log.h"
 #include "memory.h"
 #include <time.h>
 /*builds the sampled covariance function*/
 /*dimensions are even*/
-void covariance(double* covar, struct vario_mod variogram, struct grid_mod mesh, int n[3]) {
+void covariance(double* covar, struct vario_mod variogram, struct grid_mod mesh, int n[3], int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -15,6 +16,13 @@ void covariance(double* covar, struct vario_mod variogram, struct grid_mod mesh,
    log_info("RESULT = in progress");
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    for (i = 0; i < 3; i++)
        n2[i] = n[i] / 2;
@ -27,7 +35,7 @@ void covariance(double* covar, struct vario_mod variogram, struct grid_mod mesh,
                di = (double)i * mesh.DX;
                dj = (double)j * mesh.DY;
                dk = (double)k * mesh.DZ;
-                covar[maille] = (double)cov_value(variogram, di, dj, dk);
+                covar[maille] = (double)cov_value(variogram, di, dj, dk, cores);
                if (k > 0 && k < n2[2] && j > 0 && j < n2[1] && i > 0 && i < n2[0]) {
                    /*area 2*/
@ -41,7 +49,7 @@ void covariance(double* covar, struct vario_mod variogram, struct grid_mod mesh,
                    dj = (double)j * mesh.DY;
                    dk = (double)k * mesh.DZ;
                    maille = 1 + (n[0] - i) + n[0] * (j + n[1] * k);
-                    covar[maille] = (double)cov_value(variogram, di, dj, dk);
+                    covar[maille] = (double)cov_value(variogram, di, dj, dk, cores);
                }
                if (k > 0 && k < n2[2] && j > 0 && j < n2[1]) {
@ -56,7 +64,7 @@ void covariance(double* covar, struct vario_mod variogram, struct grid_mod mesh,
                    dj = -(double)j * mesh.DY;
                    dk = (double)k * mesh.DZ;
                    maille = 1 + (n[0] - i) + n[0] * (n[1] - j + n[1] * k);
-                    covar[maille] = (double)cov_value(variogram, di, dj, dk);
+                    covar[maille] = (double)cov_value(variogram, di, dj, dk, cores);
                }
                if (k > 0 && k < n2[2]) {
@ -71,7 +79,7 @@ void covariance(double* covar, struct vario_mod variogram, struct grid_mod mesh,
                    dj = -(double)j * mesh.DY;
                    dk = (double)k * mesh.DZ;
                    maille = 1 + i + n[0] * (n[1] - j + n[1] * k);
-                    covar[maille] = (double)cov_value(variogram, di, dj, dk);
+                    covar[maille] = (double)cov_value(variogram, di, dj, dk, cores);
                }
                if (k > 0 && k < n2[2] && i > 0 && i < n2[0]) {
@ -86,6 +94,14 @@ void covariance(double* covar, struct vario_mod variogram, struct grid_mod mesh,
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
--- a/fftma_module/gen/lib_src/cubic.c
+++ b/fftma_module/gen/lib_src/cubic.c
@ -1,11 +1,12 @@
 #include "genlib.h"
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <stdio.h>
 #include <time.h>
 /*cubic covariance function*/
-double cubic(double h) {
+double cubic(double h, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -13,6 +14,13 @@ double cubic(double h) {
    log_info("RESULT = in progress, h = %f", h);
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    double z;
    if (h >= 1.) {
@ -24,6 +32,14 @@ double cubic(double h) {
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
--- a/fftma_module/gen/lib_src/fftma2.c
+++ b/fftma_module/gen/lib_src/fftma2.c
@ -1,5 +1,6 @@
 #include "geostat.h"
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
@ -23,7 +24,7 @@
 /*output:                                      */
 /*realout: structure defining a realization -  */
-void FFTMA2(struct vario_mod variogram, struct grid_mod grid, int n[3], struct realization_mod* realin, struct realization_mod* realout) {
+void FFTMA2(struct vario_mod variogram, struct grid_mod grid, int n[3], struct realization_mod* realin, struct realization_mod* realout, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -31,6 +32,13 @@ void FFTMA2(struct vario_mod variogram, struct grid_mod grid, int n[3], struct r
    log_info("RESULT = in progress");
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    int NTOT, i, j, k, NMAX, NDIM, ntot, nmax, NXYZ, nxyz;
    int solver;
    double temp;
@ -40,7 +48,7 @@ void FFTMA2(struct vario_mod variogram, struct grid_mod grid, int n[3], struct r
    axes(variogram.ap, variogram.scf, variogram.Nvario);
    /*pseudo-grid definition*/
-    cgrid(variogram, grid, n);
+    cgrid(variogram, grid, n, cores);
    /*constant definition*/
    NTOT = n[0] * n[1] * n[2];
@ -74,20 +82,20 @@ void FFTMA2(struct vario_mod variogram, struct grid_mod grid, int n[3], struct r
    testmemory(worki);
    /*covariance function creation*/
-    covariance(covar, variogram, grid, n);
+    covariance(covar, variogram, grid, n, cores);
    /*power spectrum*/
-    fourt(covar, ireal, n, NDIM, 1, 0, workr, worki);
+    fourt(covar, ireal, n, NDIM, 1, 0, workr, worki, cores);
    /*organization of the input Gaussian white noise*/
    solver = 0;
-    prebuild_gwn(grid, n, realin, realization, solver);
+    prebuild_gwn(grid, n, realin, realization, solver, cores);
    /*forward fourier transform of the GWN*/
-    fourt(realization, ireal, n, NDIM, 1, 0, workr, worki);
+    fourt(realization, ireal, n, NDIM, 1, 0, workr, worki, cores);
    /* build realization in spectral domain   */
-    build_real(n, NTOT, covar, realization, ireal);
+    build_real(n, NTOT, covar, realization, ireal, cores);
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
@ -95,18 +103,26 @@ void FFTMA2(struct vario_mod variogram, struct grid_mod grid, int n[3], struct r
    free(covar);
    /*backward fourier transform*/
-    fourt(realization, ireal, n, NDIM, 0, 1, workr, worki);
+    fourt(realization, ireal, n, NDIM, 0, 1, workr, worki, cores);
    free(ireal);
    free(workr);
    free(worki);
    /*output realization*/
-    clean_real(realin, n, grid, realization, realout);
+    clean_real(realin, n, grid, realization, realout, cores);
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    log_info("RESULT = success, NTOT = %d, NMAX = %d, NDIM = %d, ntot = %d, nmax = %d, NXYZ = %d, nxyz = %d, ELAPSED = %f seconds, DIF USED VIRTUAL MEM = %5.1f MB", NTOT, NMAX, NDIM, ntot, nmax, NXYZ, nxyz, time_taken, *used_ram_tf - *used_ram_t0);
    free(used_ram_t0);
--- a/fftma_module/gen/lib_src/fourt.c
+++ b/fftma_module/gen/lib_src/fourt.c
@ -1,4 +1,5 @@
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <stdio.h>
 #include <time.h>
@ -91,13 +92,21 @@
 /*     PROGRAM MODIFIED FROM A SUBROUTINE OF BRENNER                       */
 /*     10-06-2000, MLR                                                     */
-void fourt(double* datar, double* datai, int nn[3], int ndim, int ifrwd, int icplx, double* workr, double* worki) {
+void fourt(double* datar, double* datai, int nn[3], int ndim, int ifrwd, int icplx, double* workr, double* worki, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
    clock_t t = clock();
    log_info("RESULT = in progress");
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    int ifact[21], ntot, idim, np1, n, np2, m, ntwo, iff, idiv, iquot, irem, inon2, non2p, np0, nprev, icase, ifmin, i, j, jmax, np2hf, i2, i1max, i3, j3, i1, ifp1, ifp2, i2max, i1rng, istep, imin, imax, mmax, mmin, mstep, j1, j2max, j2, jmin, j3max, nhalf;
    double theta, wstpr, wstpi, wminr, wmini, wr, wi, wtemp, thetm, wmstr, wmsti, twowr, sr, si, oldsr, oldsi, stmpr, stmpi, tempr, tempi, difi, difr, sumr, sumi, TWOPI = 6.283185307179586476925286766559;
@ -595,6 +604,14 @@ L920:
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    log_info("RESULT = success, ELAPSED = %f, DIF USED VIRTUAL MEM = %5.1f MB", time_taken, *used_ram_tf - *used_ram_t0);
--- a/fftma_module/gen/lib_src/gammf.c
+++ b/fftma_module/gen/lib_src/gammf.c
@ -1,11 +1,12 @@
 #include "genlib.h"
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <stdio.h>
 #include <time.h>
 /*gamma covariance function*/
-double gammf(double h, double alpha) {
+double gammf(double h, double alpha, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -13,6 +14,13 @@ double gammf(double h, double alpha) {
    log_info("RESULT = in progress, h = %f, alpha = %f", h, alpha);
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    float delta;
    double z;
@ -22,6 +30,14 @@ double gammf(double h, double alpha) {
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
--- a/fftma_module/gen/lib_src/gasdev.c
+++ b/fftma_module/gen/lib_src/gasdev.c
@ -1,11 +1,12 @@
 #include "genlib.h"
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <time.h>
 #define NTAB 32
-double gasdev(long* idum, long* idum2, long* iy, long iv[NTAB]) {
+double gasdev(long* idum, long* idum2, long* iy, long iv[NTAB], int cores) {
    /*returns a normally distributed deviate with 0 mean*/
    /*and unit variance, using ran2(idum) as the source */
    /*of uniform deviates                               */
@ -16,15 +17,22 @@ double gasdev(long* idum, long* idum2, long* iy, long iv[NTAB]) {
    log_info("RESULT = in progress, idum = %f, idum2 = %f, iy = %f", *idum, *idum2, *iy);
-    double ran2(long* idum, long* idum2, long* iy, long iv[NTAB]);
+    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    double ran2(long* idum, long* idum2, long* iy, long iv[NTAB], int cores);
    static int iset = 0;
    static double gset;
    double fac, rsq, v1, v2;
    if (iset == 0) {
        do {
-            v1 = 2.0 * ran2(idum, idum2, iy, iv) - 1.0;
+            v1 = 2.0 * ran2(idum, idum2, iy, iv, cores) - 1.0;
-            v2 = 2.0 * ran2(idum, idum2, iy, iv) - 1.0;
+            v2 = 2.0 * ran2(idum, idum2, iy, iv, cores) - 1.0;
            rsq = v1 * v1 + v2 * v2;
        } while (rsq >= 1.0 || rsq == 0.0);
@ -35,6 +43,14 @@ double gasdev(long* idum, long* idum2, long* iy, long iv[NTAB]) {
        t = clock() - t;
        double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
        for (int i = 0; i < cores; i++) {
            get_stats(&final[i], i - 1);
        }
        for (int i = 0; i < cores; i++) {
            log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
        }
        double* used_ram_tf = malloc(sizeof(double));
        getVirtualMemUsed(used_ram_tf);
@ -50,6 +66,14 @@ double gasdev(long* idum, long* idum2, long* iy, long iv[NTAB]) {
        t = clock() - t;
        double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
        for (int i = 0; i < cores; i++) {
            get_stats(&final[i], i - 1);
        }
        for (int i = 0; i < cores; i++) {
            log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
        }
        double* used_ram_tf = malloc(sizeof(double));
        getVirtualMemUsed(used_ram_tf);
--- a/fftma_module/gen/lib_src/generate.c
+++ b/fftma_module/gen/lib_src/generate.c
@ -49,7 +49,7 @@ void generate(long* seed, int n, struct realization_mod* realization, int cores)
    /*Gaussian white noise generation*/
    for (i = 0; i < n; i++)
-        (*realization).vector[i] = gasdev(seed, &idum2, &iy, iv);
+        (*realization).vector[i] = gasdev(seed, &idum2, &iy, iv, cores);
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
--- a/fftma_module/gen/lib_src/geostat.h
+++ b/fftma_module/gen/lib_src/geostat.h
@ -298,7 +298,7 @@ struct realization_mod {
 void axes(double* ap, double* scf, int N);
 /*cardsin covariance value for lag h*/
-double cardsin(double h);
+double cardsin(double h, int cores);
 /*Cholesky decomposition of matrix C                    */
 /* C     : symetric positive-definite matrix recorded   */
@ -326,7 +326,7 @@ void coordinates(int maille, int i[3], struct grid_mod grid);
 /*variogram: structure defined above     */
 /*grid: structure defined above          */
 /*n: number of gridblocks along X,Y and Z*/
-void covariance(double* covar, struct vario_mod variogram, struct grid_mod grid, int n[3]);
+void covariance(double* covar, struct vario_mod variogram, struct grid_mod grid, int n[3], int cores);
 /*computation of the covariance matrix for the well data*/
 /*well coordinates are given as a number of cells       */
@ -356,10 +356,10 @@ void cov_matrix(double* C, struct vario_mod variogram, struct welldata_mod well,
 /*dj: distance along the Y axis                        */
 /*dk: distance along the Z axis                        */
 /* The returned value is the computed covariance value */
-double cov_value(struct vario_mod variogram, double di, double dj, double dk);
+double cov_value(struct vario_mod variogram, double di, double dj, double dk, int cores);
 /*cubic covariance value for lag h*/
-double cubic(double h);
+double cubic(double h, int cores);
 /*truncation of the power spectrum to remove     */
 /*high frequencies - isotropic case              */
@ -402,7 +402,7 @@ double exponential(double h);
 /*workr: utility real part vector for storage         */
 /*worki: utility imaginary part vector for storage    */
 /*The transformed data are returned in datar and datai*/
-void fourt(double* datar, double* datai, int nn[3], int ndim, int ifrwd, int icplx, double* workr, double* worki);
+void fourt(double* datar, double* datai, int nn[3], int ndim, int ifrwd, int icplx, double* workr, double* worki, int cores);
 /*calculates F(x) = (1/a)*exp(-x*x/2)*/
 double funtrun1(double x);
@ -411,7 +411,7 @@ double funtrun1(double x);
 float G(float x);
 /*gamma covariance value for lag h and exponent alpha*/
-double gammf(double h, double alpha);
+double gammf(double h, double alpha, int cores);
 /*returns the value ln(G(x))*/
 float gammln(float xx);
@ -423,7 +423,7 @@ float gammp(float a, float x);
 /*and unit variance, using ran1(idum) as the source */
 /*of uniform deviates                               */
 /*idum: seed                                        */
-double gasdev(long* idum, long* idum2, long* iy, long* iv);
+double gasdev(long* idum, long* idum2, long* iy, long* iv, int cores);
 /*gaussian covariance value for lag h*/
 double gaussian(double h);
@ -478,7 +478,7 @@ void gradual(struct grad_mod grad, float* Zo, float* Z, float* Zfinal, int n, st
 /*n: vector with the number of cells along the     */
 /*   X, Y and Z axes for the underlying grid       */
 /*   i = [0 1 2]                                   */
-void cgrid(struct vario_mod variogram, struct grid_mod grid, int n[3]);
+void cgrid(struct vario_mod variogram, struct grid_mod grid, int n[3], int cores);
 /*incomplete gamma function evaluated by its series*/
 /*representation as gamser, also returns ln(G(a))  */
@ -503,7 +503,7 @@ void krig_stat(float* b, int n, struct vario_mod variogram, struct welldata_mod
 /*i: considered direction                             */
 /*scf: correlation length                             */
 /*ap: normalized anisotropy axes                      */
-int length(int N, int i, double* scf, double* ap, double D, int Nvari);
+int length(int N, int i, double* scf, double* ap, double D, int Nvari, int cores);
 /*calculates L.Z/
 /* L     : lower triangular matrix recorded            */
@ -517,7 +517,7 @@ int length(int N, int i, double* scf, double* ap, double D, int Nvari);
 void LtimeZ(double* L, float* Z, float* b, int n);
 /*determines the greatest prime factor of an integer*/
-int maxfactor(int n);
+int maxfactor(int n, int cores);
 /*metrop returns a boolean varible that issues a       */
 /*verdict on whether to accept a reconfiguration       */
@ -543,7 +543,7 @@ double power(double h, double alpha);
 /*generates uniform deviates between 0 and 1*/
 /*idum: seed                                */
-double ran2(long* idum, long* idum2, long* iy, long* iv);
+double ran2(long* idum, long* idum2, long* iy, long* iv, int cores);
 /*calculates bt.b                */
 /* b : vector, bi, i = [0...n-1] */
--- a/fftma_module/gen/lib_src/length.c
+++ b/fftma_module/gen/lib_src/length.c
@ -1,9 +1,10 @@
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <time.h>
 /* compute the length for one dimension*/
-int length(int N, int i, double* scf, double* ap, double D, int Nvari) {
+int length(int N, int i, double* scf, double* ap, double D, int Nvari, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -11,7 +12,14 @@ int length(int N, int i, double* scf, double* ap, double D, int Nvari) {
    log_info("RESULT = in progress, N = %d, i = %d, D = %f, Nvari = %d", N, i, D, Nvari);
-    int maxfactor(int n);
+    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    int maxfactor(int n, int cores);
    double temp1, temp2;
    int n, j, k, nmax;
    int nlimit = 13;
@ -37,16 +45,24 @@ int length(int N, int i, double* scf, double* ap, double D, int Nvari) {
        }
        if ((n % 2) != 0)
            n = n + 1;
-        nmax = maxfactor(n);
+        nmax = maxfactor(n, cores);
        while (nmax > nlimit) {
            n += 2;
-            nmax = maxfactor(n);
+            nmax = maxfactor(n, cores);
        }
    }
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
--- a/fftma_module/gen/lib_src/maxfactor.c
+++ b/fftma_module/gen/lib_src/maxfactor.c
@ -1,8 +1,9 @@
 #include "genlib.h"
 #include "log.h"
 #include "memory.h"
 /*determines the greatest prime factor of an integer*/
-int maxfactor(int n) {
+int maxfactor(int n, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -10,6 +11,13 @@ int maxfactor(int n) {
    log_info("RESULT = in progress, n = %d", n);
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    int test_fact(int* pnum, int fact, int* pmaxfac);
    int lnum, fact;
    int maxfac;
@ -42,6 +50,14 @@ int maxfactor(int n) {
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
--- a/fftma_module/gen/lib_src/memory.c
+++ b/fftma_module/gen/lib_src/memory.c
@ -86,6 +86,8 @@ double calculate_load(struct cpustat *prev, struct cpustat *cur) {
    double idled = (double) idle_cur - (double) idle_prev;
    if (totald == 0 && idled == 0) return 0; 
    double cpu_perc = (1000 * (totald - idled) / totald + 1) / 10;
    return cpu_perc;
--- a/fftma_module/gen/lib_src/prebuild_gwn.c
+++ b/fftma_module/gen/lib_src/prebuild_gwn.c
@ -1,5 +1,6 @@
 #include "geostat.h"
 #include "log.h"
 #include "memory.h"
 #include <math.h>
 #include <stdarg.h>
 #include <stddef.h>
@ -21,7 +22,7 @@
 /*        must be a Gaussian white noise       */
 /*realization: structure defining a realization*/
-void prebuild_gwn(struct grid_mod grid, int n[3], struct realization_mod* realin, double* realization, int solver) {
+void prebuild_gwn(struct grid_mod grid, int n[3], struct realization_mod* realin, double* realization, int solver, int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -32,6 +33,13 @@ void prebuild_gwn(struct grid_mod grid, int n[3], struct realization_mod* realin
    log_info("RESULT = in progress, n[0] = %d, n[1] = %d, n[2] = %d, solver = %d", n[0], n[1], n[2], solver);
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    ntot = n[0] * n[1] * n[2];
    realization[0] = 0.;
    if (solver == 1) {
@ -57,6 +65,14 @@ void prebuild_gwn(struct grid_mod grid, int n[3], struct realization_mod* realin
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);
--- a/fftma_module/gen/lib_src/ran2.c
+++ b/fftma_module/gen/lib_src/ran2.c
@ -1,6 +1,7 @@
 #include <time.h>
 #include "genlib.h"
 #include "log.h"
 #include "memory.h"
 #define IM1 2147483563
 #define IM2 2147483399
@ -17,7 +18,7 @@
 #define EPS 1.2e-7
 #define RNMX (1.0 - EPS)
-double ran2(long* idum, long* idum2, long* iy, long iv[NTAB]) {
+double ran2(long* idum, long* idum2, long* iy, long iv[NTAB], int cores) {
    double* used_ram_t0 = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_t0);
@ -29,6 +30,13 @@ double ran2(long* idum, long* idum2, long* iy, long iv[NTAB]) {
    log_info("RESULT = in progress");
    struct cpustat initial[cores];
    struct cpustat final[cores];
    for (int i = 0; i < cores; i++) {
        get_stats(&initial[i], i - 1);
    }
    if (*idum <= 0) {
        if (-(*idum) < 1)
            *idum = 1;
@ -63,6 +71,14 @@ double ran2(long* idum, long* idum2, long* iy, long iv[NTAB]) {
    t = clock() - t;
    double time_taken = ((double)t)/CLOCKS_PER_SEC; // calculate the elapsed time
    for (int i = 0; i < cores; i++) {
        get_stats(&final[i], i - 1);
    }
    for (int i = 0; i < cores; i++) {
        log_info("CPU %d: %lf%%\n", i, calculate_load(&initial[i], &final[i]));
    }
    double* used_ram_tf = malloc(sizeof(double));
    getVirtualMemUsed(used_ram_tf);