diff --git a/fftma_module/gen/analysis.ipynb b/fftma_module/gen/analysis.ipynb index 3298186..1635403 100644 --- a/fftma_module/gen/analysis.ipynb +++ b/fftma_module/gen/analysis.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 101, "metadata": {}, "outputs": [], "source": [ @@ -24,7 +24,7 @@ "source": [ "## Armado del dataset\n", "\n", - "En este paso parsearemos los archivos para obtener estadísticas sobre el tiempo que tarda cada ejecución de una función, sobre la memoria usada, el uso de CPU (TODO). Con esto buscamos identificar:\n", + "En este paso parsearemos los archivos para obtener estadísticas sobre el tiempo que tarda cada ejecución de una función, sobre la memoria usada, el uso de CPU. Con esto buscamos identificar:\n", "- Qué funciones son las que consumen mayor cantidad de memoria\n", "- Qué funciones son las que tienen un mayor tiempo de procesamiento\n", "- Qué funciones son las que son invocadas una mayor cantidad de veces\n", @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 108, "metadata": {}, "outputs": [], "source": [ @@ -44,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 109, "metadata": {}, "outputs": [], "source": [ @@ -58,50 +58,61 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 158, "metadata": {}, "outputs": [], "source": [ "def get_data(file_name):\n", " data = []\n", + " row = {}\n", "\n", " with open(file_name) as log_file:\n", " lines = log_file.readlines()\n", " for line in lines:\n", - " row = {}\n", " split_line = line.split()\n", - " if \"USED\" not in split_line or \"ELAPSED\" not in split_line: continue\n", + " \n", + " if \"USED\" not in split_line and \"ELAPSED\" not in split_line and \"CPU\" not in split_line: continue\n", + " \n", + " if \"CPU\" in split_line:\n", + " idx_cpu = split_line.index(\"CPU\") + 1\n", + " idx_per = idx_cpu + 1\n", + " row[\"CPU_{}\".format(split_line[idx_cpu].rsplit(':')[0])] = float(split_line[idx_per].rsplit(\"%\")[0])\n", + " continue\n", + " \n", + " \n", " idx_used_mem = split_line.index(\"USED\") + 4\n", " idx_elapsed = split_line.index(\"ELAPSED\") + 2\n", " \n", " function_name = get_function_name(split_line[2])\n", + " \n", " used_virtual_mem = float(split_line[idx_used_mem])\n", " elapsed = float(split_line[idx_elapsed].rsplit(\",\")[0])\n", "\n", - " # TODO: add CPU\n", " row[\"function\"] = function_name\n", " row[\"memory\"] = used_virtual_mem \n", " row[\"time\"] = elapsed\n", + " print(row)\n", " data.append(row)\n", + " row = {}\n", " \n", " return data" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 156, "metadata": {}, "outputs": [], "source": [ "def create_df(file_name):\n", " data = get_data(file_name)\n", " df = pd.DataFrame(data)\n", - " return df.groupby(['function']).agg({'time': ['min', 'max', 'mean', 'sum', 'count'], 'memory': ['min', 'max', 'median']})" + " return df.groupby(['function']).agg({'time': ['min', 'max', 'mean', 'sum', 'count'], 'memory': ['min', 'max', 'median'], 'CPU_0': ['mean'], 'CPU_1': ['mean'], 'CPU_2': ['mean'], 'CPU_3': ['mean'], 'CPU_4': ['mean'], 'CPU_5': ['mean'], 'CPU_6': ['mean'], 'CPU_7': ['mean']})" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 150, "metadata": {}, "outputs": [], "source": [ @@ -112,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 151, "metadata": {}, "outputs": [], "source": [ @@ -170,19 +181,159 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 157, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "1931\n" + "{'function': 'generate', 'time': 0.011741, 'memory': 0.5, 'CPU_2': 0.1, 'CPU_3': 100.1, 'CPU_0': 44.544444, 'CPU_1': 50.1, 'CPU_6': 0.1, 'CPU_7': 100.1, 'CPU_4': 0.1, 'CPU_5': 0.1}\n", + "{'function': 'Py_kgeneration', 'time': 0.018899, 'memory': 0.5, 'CPU_2': 0.1, 'CPU_3': 50.1, 'CPU_0': 41.276471, 'CPU_1': 33.433333, 'CPU_6': 0.1, 'CPU_7': 100.1, 'CPU_4': 0.1, 'CPU_5': 0.1}\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CPU_1timeCPU_2CPU_3CPU_0memoryCPU_6CPU_7CPU_4CPU_5
meanminmaxmeansumcountmeanmeanmeanminmaxmedianmeanmeanmeanmean
function
Py_kgeneration33.4333330.0188990.0188990.0188990.01889910.150.141.2764710.50.50.50.1100.10.10.1
generate50.1000000.0117410.0117410.0117410.01174110.1100.144.5444440.50.50.50.1100.10.10.1
\n", + "
" + ], + "text/plain": [ + " CPU_1 time CPU_2 \\\n", + " mean min max mean sum count mean \n", + "function \n", + "Py_kgeneration 33.433333 0.018899 0.018899 0.018899 0.018899 1 0.1 \n", + "generate 50.100000 0.011741 0.011741 0.011741 0.011741 1 0.1 \n", + "\n", + " CPU_3 CPU_0 memory CPU_6 CPU_7 CPU_4 CPU_5 \n", + " mean mean min max median mean mean mean mean \n", + "function \n", + "Py_kgeneration 50.1 41.276471 0.5 0.5 0.5 0.1 100.1 0.1 0.1 \n", + "generate 100.1 44.544444 0.5 0.5 0.5 0.1 100.1 0.1 0.1 " + ] + }, + "execution_count": 157, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "df = analyze('log_8-aa')" + "df = analyze('log_8-aa')\n", + "\n", + "\n", + "df" ] }, {