## Análisis de la etapa de generación de medios

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np  

## Armado del dataset

En este paso parsearemos los archivos para obtener estadísticas sobre el tiempo que tarda cada ejecución de una función, sobre la memoria usada, el uso de CPU (TODO). Con esto buscamos identificar:
- Qué funciones son las que consumen mayor cantidad de memoria
- Qué funciones son las que tienen un mayor tiempo de procesamiento
- Qué funciones son las que son invocadas una mayor cantidad de veces

Una vez identificados estos puntos de análisis podemos proponer soluciones para mejorar estas estadísticas.

In [3]:
def get_function_name(function_name):
    return function_name[10:].rsplit(".c")[0]

In [4]:
relations = {
    "generate": ["gasdev"],
    "fftma2": ["covariance", "fourt", "prebuild_gwn"]
}

In [28]:
def analyze(file_name):
    data = []

    with open(file_name) as log_file: 
        lines = log_file.readlines()
        print(len(lines))
        for line in lines:
            row = {}
            split_line = line.split()
            if "USED" not in split_line or "ELAPSED" not in split_line:
                continue
            idx_used_mem = split_line.index("USED") + 4
            idx_elapsed = split_line.index("ELAPSED") + 2
            
            function_name = get_function_name(split_line[2])
            used_virtual_mem = float(split_line[idx_used_mem])
            elapsed = float(split_line[idx_elapsed].rsplit(",")[0])

            # TODO: add CPU
            row["function"] = function_name
            row["memory"] = used_virtual_mem 
            row["time"] = elapsed
            data.append(row)
            
    df = pd.DataFrame(data)
    df_grouped = df.groupby(['function']).agg({'time': ['min', 'max', 'mean', 'sum', 'count'], 'memory': ['min', 'max', 'median']})
    return df_grouped.sort_values(by=('time', 'sum'), ascending=False)                

## N = 8

In [19]:
analyze('log_8.txt')

Unnamed: 0_level_0,memory,memory,memory,time,time,time,time,time
Unnamed: 0_level_1,min,max,median,min,max,mean,sum,count
function,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Py_kgeneration,2.9,2.9,2.9,0.091805,0.091805,0.091805,0.091805,1
generate,2.9,2.9,2.9,0.081707,0.081707,0.081707,0.081707,1
gasdev,0.0,0.5,0.0,0.0,0.005674,9.3e-05,0.047828,512
fftma2,0.0,0.0,0.0,0.007942,0.007942,0.007942,0.007942,1
covariance,0.0,0.0,0.0,0.007492,0.007492,0.007492,0.007492,1
ran2,0.0,0.5,0.0,0.0,1.8e-05,2e-06,0.001708,702
cov_value,0.0,0.0,0.0,0.0,3e-05,1e-06,0.000707,700
fourt,0.0,0.0,0.0,7.9e-05,0.000107,9.1e-05,0.000274,3
cgrid,0.0,0.0,0.0,6.7e-05,6.7e-05,6.7e-05,6.7e-05,1
length,0.0,0.0,0.0,8e-06,8e-06,8e-06,2.4e-05,3


## N = 16

In [20]:
analyze('log_16.txt')

Unnamed: 0_level_0,memory,memory,memory,time,time,time,time,time
Unnamed: 0_level_1,min,max,median,min,max,mean,sum,count
function,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Py_kgeneration,4.9,4.9,4.9,0.352931,0.352931,0.352931,0.352931,1
generate,4.2,4.2,4.2,0.319565,0.319565,0.319565,0.319565,1
gasdev,0.0,0.5,0.0,0.0,0.014654,3.830811e-05,0.15691,4096
fftma2,1.0,1.0,1.0,0.031217,0.031217,0.031217,0.031217,1
covariance,1.0,1.0,1.0,0.030104,0.030104,0.030104,0.030104,1
ran2,0.0,0.5,0.0,0.0,2.2e-05,1.03208e-06,0.005437,5268
cov_value,0.0,0.5,0.0,0.0,1.3e-05,8.762626e-07,0.003123,3564
fourt,0.0,0.0,0.0,0.000269,0.000371,0.0003073333,0.000922,3
cgrid,0.0,0.0,0.0,4.7e-05,4.7e-05,4.7e-05,4.7e-05,1
length,0.0,0.0,0.0,8e-06,1e-05,8.666667e-06,2.6e-05,3


## N = 32

In [21]:
analyze('log_32.txt')

Unnamed: 0_level_0,memory,memory,memory,time,time,time,time,time
Unnamed: 0_level_1,min,max,median,min,max,mean,sum,count
function,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Py_kgeneration,2.7,2.7,2.7,1.180846,1.180846,1.180846,1.180846,1
generate,4.9,4.9,4.9,0.840075,0.840075,0.840075,0.840075,1
gasdev,-1.3,0.5,0.0,0.0,0.003243,1.513522e-05,0.495951,32768
fftma2,-2.2,-2.2,-2.2,0.338882,0.338882,0.338882,0.338882,1
covariance,-2.5,-2.5,-2.5,0.330282,0.330282,0.330282,0.330282,1
ran2,-0.3,0.5,0.0,0.0,0.000103,9.740325e-07,0.040473,41552
cov_value,0.0,0.5,0.0,0.0,5.8e-05,1.230344e-06,0.030296,24624
fourt,0.0,0.3,0.2,0.002212,0.003346,0.002646333,0.007939,3
prebuild_gwn,0.0,0.0,0.0,0.000178,0.000178,0.000178,0.000178,1
build_real,0.0,0.0,0.0,0.000151,0.000151,0.000151,0.000151,1


## N = 64

In [26]:
analyze('log_64.txt')

Unnamed: 0_level_0,memory,memory,memory,time,time,time,time,time
Unnamed: 0_level_1,min,max,median,min,max,mean,sum,count
function,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Py_kgeneration,233.5,233.5,233.5,30.35427,30.35427,30.35427,30.35427,1
generate,168.4,168.4,168.4,21.739521,21.739521,21.739521,21.739521,1
gasdev,-2.9,4.4,0.0,9e-06,0.000742,6.1e-05,16.076987,262144
fftma2,68.0,68.0,68.0,8.613854,8.613854,8.613854,8.613854,1
covariance,63.8,63.8,63.8,8.412335,8.412335,8.412335,8.412335,1
ran2,-0.6,0.8,0.0,7e-06,0.000281,1.4e-05,4.680977,333450
cov_value,-0.7,1.0,0.0,1.8e-05,0.00032,3e-05,4.646094,156816
fourt,0.3,1.7,0.4,0.05183,0.078516,0.064563,0.193689,3
prebuild_gwn,2.5,2.5,2.5,0.002939,0.002939,0.002939,0.002939,1
build_real,-0.3,-0.3,-0.3,0.002089,0.002089,0.002089,0.002089,1


## N = 128

In [29]:
analyze('log_128.txt')

5897863


Unnamed: 0_level_0,memory,memory,memory,time,time,time,time,time
Unnamed: 0_level_1,min,max,median,min,max,mean,sum,count
function,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Py_kgeneration,1864.7,1864.7,1864.7,126.767549,126.767549,126.767549,126.767549,1
generate,1759.0,1759.0,1759.0,97.731527,97.731527,97.731527,97.731527,1
gasdev,-134.6,18.5,0.0,0.0,0.000772,2.7e-05,57.591066,2097152
fftma2,129.3,129.3,129.3,29.034783,29.034783,29.034783,29.034783,1
covariance,93.1,93.1,93.1,27.493769,27.493769,27.493769,27.493769,1
ran2,-4.8,2.2,0.0,0.0,6.5e-05,2e-06,4.661066,2668394
cov_value,-0.6,0.7,0.0,1e-06,0.000281,2e-06,2.443765,1132300
fourt,-5.2,18.7,0.5,0.390409,0.591145,0.499473,1.498418,3
build_real,0.0,0.0,0.0,0.017085,0.017085,0.017085,0.017085,1
prebuild_gwn,17.0,17.0,17.0,0.014117,0.014117,0.014117,0.014117,1


## N = 256