Add CPU analysis

milestone_5_without_improvements-logs
chortas 3 years ago
parent 036d958eb8
commit 96bcbf2c95

@ -9,7 +9,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 101,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -24,7 +24,7 @@
"source": [ "source": [
"## Armado del dataset\n", "## Armado del dataset\n",
"\n", "\n",
"En este paso parsearemos los archivos para obtener estadísticas sobre el tiempo que tarda cada ejecución de una función, sobre la memoria usada, el uso de CPU (TODO). Con esto buscamos identificar:\n", "En este paso parsearemos los archivos para obtener estadísticas sobre el tiempo que tarda cada ejecución de una función, sobre la memoria usada, el uso de CPU. Con esto buscamos identificar:\n",
"- Qué funciones son las que consumen mayor cantidad de memoria\n", "- Qué funciones son las que consumen mayor cantidad de memoria\n",
"- Qué funciones son las que tienen un mayor tiempo de procesamiento\n", "- Qué funciones son las que tienen un mayor tiempo de procesamiento\n",
"- Qué funciones son las que son invocadas una mayor cantidad de veces\n", "- Qué funciones son las que son invocadas una mayor cantidad de veces\n",
@ -34,7 +34,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 108,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -44,7 +44,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 109,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -58,50 +58,61 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 158,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def get_data(file_name):\n", "def get_data(file_name):\n",
" data = []\n", " data = []\n",
" row = {}\n",
"\n", "\n",
" with open(file_name) as log_file:\n", " with open(file_name) as log_file:\n",
" lines = log_file.readlines()\n", " lines = log_file.readlines()\n",
" for line in lines:\n", " for line in lines:\n",
" row = {}\n",
" split_line = line.split()\n", " split_line = line.split()\n",
" if \"USED\" not in split_line or \"ELAPSED\" not in split_line: continue\n", " \n",
" if \"USED\" not in split_line and \"ELAPSED\" not in split_line and \"CPU\" not in split_line: continue\n",
" \n",
" if \"CPU\" in split_line:\n",
" idx_cpu = split_line.index(\"CPU\") + 1\n",
" idx_per = idx_cpu + 1\n",
" row[\"CPU_{}\".format(split_line[idx_cpu].rsplit(':')[0])] = float(split_line[idx_per].rsplit(\"%\")[0])\n",
" continue\n",
" \n",
" \n",
" idx_used_mem = split_line.index(\"USED\") + 4\n", " idx_used_mem = split_line.index(\"USED\") + 4\n",
" idx_elapsed = split_line.index(\"ELAPSED\") + 2\n", " idx_elapsed = split_line.index(\"ELAPSED\") + 2\n",
" \n", " \n",
" function_name = get_function_name(split_line[2])\n", " function_name = get_function_name(split_line[2])\n",
" \n",
" used_virtual_mem = float(split_line[idx_used_mem])\n", " used_virtual_mem = float(split_line[idx_used_mem])\n",
" elapsed = float(split_line[idx_elapsed].rsplit(\",\")[0])\n", " elapsed = float(split_line[idx_elapsed].rsplit(\",\")[0])\n",
"\n", "\n",
" # TODO: add CPU\n",
" row[\"function\"] = function_name\n", " row[\"function\"] = function_name\n",
" row[\"memory\"] = used_virtual_mem \n", " row[\"memory\"] = used_virtual_mem \n",
" row[\"time\"] = elapsed\n", " row[\"time\"] = elapsed\n",
" print(row)\n",
" data.append(row)\n", " data.append(row)\n",
" row = {}\n",
" \n", " \n",
" return data" " return data"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 156,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def create_df(file_name):\n", "def create_df(file_name):\n",
" data = get_data(file_name)\n", " data = get_data(file_name)\n",
" df = pd.DataFrame(data)\n", " df = pd.DataFrame(data)\n",
" return df.groupby(['function']).agg({'time': ['min', 'max', 'mean', 'sum', 'count'], 'memory': ['min', 'max', 'median']})" " return df.groupby(['function']).agg({'time': ['min', 'max', 'mean', 'sum', 'count'], 'memory': ['min', 'max', 'median'], 'CPU_0': ['mean'], 'CPU_1': ['mean'], 'CPU_2': ['mean'], 'CPU_3': ['mean'], 'CPU_4': ['mean'], 'CPU_5': ['mean'], 'CPU_6': ['mean'], 'CPU_7': ['mean']})"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 150,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -112,7 +123,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 151,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -170,19 +181,159 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 157,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"1931\n" "{'function': 'generate', 'time': 0.011741, 'memory': 0.5, 'CPU_2': 0.1, 'CPU_3': 100.1, 'CPU_0': 44.544444, 'CPU_1': 50.1, 'CPU_6': 0.1, 'CPU_7': 100.1, 'CPU_4': 0.1, 'CPU_5': 0.1}\n",
"{'function': 'Py_kgeneration', 'time': 0.018899, 'memory': 0.5, 'CPU_2': 0.1, 'CPU_3': 50.1, 'CPU_0': 41.276471, 'CPU_1': 33.433333, 'CPU_6': 0.1, 'CPU_7': 100.1, 'CPU_4': 0.1, 'CPU_5': 0.1}\n"
] ]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>CPU_1</th>\n",
" <th colspan=\"5\" halign=\"left\">time</th>\n",
" <th>CPU_2</th>\n",
" <th>CPU_3</th>\n",
" <th>CPU_0</th>\n",
" <th colspan=\"3\" halign=\"left\">memory</th>\n",
" <th>CPU_6</th>\n",
" <th>CPU_7</th>\n",
" <th>CPU_4</th>\n",
" <th>CPU_5</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>mean</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>mean</th>\n",
" <th>sum</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>median</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" <th>mean</th>\n",
" </tr>\n",
" <tr>\n",
" <th>function</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Py_kgeneration</th>\n",
" <td>33.433333</td>\n",
" <td>0.018899</td>\n",
" <td>0.018899</td>\n",
" <td>0.018899</td>\n",
" <td>0.018899</td>\n",
" <td>1</td>\n",
" <td>0.1</td>\n",
" <td>50.1</td>\n",
" <td>41.276471</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.1</td>\n",
" <td>100.1</td>\n",
" <td>0.1</td>\n",
" <td>0.1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>generate</th>\n",
" <td>50.100000</td>\n",
" <td>0.011741</td>\n",
" <td>0.011741</td>\n",
" <td>0.011741</td>\n",
" <td>0.011741</td>\n",
" <td>1</td>\n",
" <td>0.1</td>\n",
" <td>100.1</td>\n",
" <td>44.544444</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.5</td>\n",
" <td>0.1</td>\n",
" <td>100.1</td>\n",
" <td>0.1</td>\n",
" <td>0.1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" CPU_1 time CPU_2 \\\n",
" mean min max mean sum count mean \n",
"function \n",
"Py_kgeneration 33.433333 0.018899 0.018899 0.018899 0.018899 1 0.1 \n",
"generate 50.100000 0.011741 0.011741 0.011741 0.011741 1 0.1 \n",
"\n",
" CPU_3 CPU_0 memory CPU_6 CPU_7 CPU_4 CPU_5 \n",
" mean mean min max median mean mean mean mean \n",
"function \n",
"Py_kgeneration 50.1 41.276471 0.5 0.5 0.5 0.1 100.1 0.1 0.1 \n",
"generate 100.1 44.544444 0.5 0.5 0.5 0.1 100.1 0.1 0.1 "
]
},
"execution_count": 157,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
"df = analyze('log_8-aa')" "df = analyze('log_8-aa')\n",
"\n",
"\n",
"df"
] ]
}, },
{ {

Loading…
Cancel
Save