Add CPU analysis

4 years ago · 96bcbf2c95
parent 036d958eb8
commit 96bcbf2c95
1 changed files with 166 additions and 15 deletions
--- a/fftma_module/gen/analysis.ipynb
+++ b/fftma_module/gen/analysis.ipynb
@ -9,7 +9,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 101,
   "metadata": {},
   "outputs": [],
   "source": [
@ -24,7 +24,7 @@
   "source": [
    "## Armado del dataset\n",
    "\n",
-    "En este paso parsearemos los archivos para obtener estadísticas sobre el tiempo que tarda cada ejecución de una función, sobre la memoria usada, el uso de CPU (TODO). Con esto buscamos identificar:\n",
+    "En este paso parsearemos los archivos para obtener estadísticas sobre el tiempo que tarda cada ejecución de una función, sobre la memoria usada, el uso de CPU. Con esto buscamos identificar:\n",
    "- Qué funciones son las que consumen mayor cantidad de memoria\n",
    "- Qué funciones son las que tienen un mayor tiempo de procesamiento\n",
    "- Qué funciones son las que son invocadas una mayor cantidad de veces\n",
@ -34,7 +34,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 108,
   "metadata": {},
   "outputs": [],
   "source": [
@ -44,7 +44,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
@ -58,50 +58,61 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 158,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_data(file_name):\n",
    "    data = []\n",
    "    row = {}\n",
    "\n",
    "    with open(file_name) as log_file:\n",
    "        lines = log_file.readlines()\n",
    "        for line in lines:\n",
    "            row = {}\n",
    "            split_line = line.split()\n",
-    "            if \"USED\" not in split_line or \"ELAPSED\" not in split_line: continue\n",
+    "                \n",
    "            if \"USED\" not in split_line and \"ELAPSED\" not in split_line and \"CPU\" not in split_line: continue\n",
    "    \n",
    "            if \"CPU\" in split_line:\n",
    "                idx_cpu = split_line.index(\"CPU\") + 1\n",
    "                idx_per = idx_cpu + 1\n",
    "                row[\"CPU_{}\".format(split_line[idx_cpu].rsplit(':')[0])] = float(split_line[idx_per].rsplit(\"%\")[0])\n",
    "                continue\n",
    "                \n",
    "                \n",
    "            idx_used_mem = split_line.index(\"USED\") + 4\n",
    "            idx_elapsed = split_line.index(\"ELAPSED\") + 2\n",
    "            \n",
    "            function_name = get_function_name(split_line[2])\n",
    "                        \n",
    "            used_virtual_mem = float(split_line[idx_used_mem])\n",
    "            elapsed = float(split_line[idx_elapsed].rsplit(\",\")[0])\n",
    "\n",
    "            # TODO: add CPU\n",
    "            row[\"function\"] = function_name\n",
    "            row[\"memory\"] = used_virtual_mem \n",
    "            row[\"time\"] = elapsed\n",
    "            print(row)\n",
    "            data.append(row)\n",
    "            row = {}\n",
    "            \n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 156,
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_df(file_name):\n",
    "    data = get_data(file_name)\n",
    "    df = pd.DataFrame(data)\n",
-    "    return df.groupby(['function']).agg({'time': ['min', 'max', 'mean', 'sum', 'count'], 'memory': ['min', 'max', 'median']})"
+    "    return df.groupby(['function']).agg({'time': ['min', 'max', 'mean', 'sum', 'count'], 'memory': ['min', 'max', 'median'], 'CPU_0': ['mean'], 'CPU_1': ['mean'], 'CPU_2': ['mean'], 'CPU_3': ['mean'], 'CPU_4': ['mean'], 'CPU_5': ['mean'], 'CPU_6': ['mean'], 'CPU_7': ['mean']})"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 150,
   "metadata": {},
   "outputs": [],
   "source": [
@ -112,7 +123,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 151,
   "metadata": {},
   "outputs": [],
   "source": [
@ -170,19 +181,159 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "1931\n"
+      "{'function': 'generate', 'time': 0.011741, 'memory': 0.5, 'CPU_2': 0.1, 'CPU_3': 100.1, 'CPU_0': 44.544444, 'CPU_1': 50.1, 'CPU_6': 0.1, 'CPU_7': 100.1, 'CPU_4': 0.1, 'CPU_5': 0.1}\n",
      "{'function': 'Py_kgeneration', 'time': 0.018899, 'memory': 0.5, 'CPU_2': 0.1, 'CPU_3': 50.1, 'CPU_0': 41.276471, 'CPU_1': 33.433333, 'CPU_6': 0.1, 'CPU_7': 100.1, 'CPU_4': 0.1, 'CPU_5': 0.1}\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>CPU_1</th>\n",
       "      <th colspan=\"5\" halign=\"left\">time</th>\n",
       "      <th>CPU_2</th>\n",
       "      <th>CPU_3</th>\n",
       "      <th>CPU_0</th>\n",
       "      <th colspan=\"3\" halign=\"left\">memory</th>\n",
       "      <th>CPU_6</th>\n",
       "      <th>CPU_7</th>\n",
       "      <th>CPU_4</th>\n",
       "      <th>CPU_5</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>mean</th>\n",
       "      <th>min</th>\n",
       "      <th>max</th>\n",
       "      <th>mean</th>\n",
       "      <th>sum</th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>mean</th>\n",
       "      <th>mean</th>\n",
       "      <th>min</th>\n",
       "      <th>max</th>\n",
       "      <th>median</th>\n",
       "      <th>mean</th>\n",
       "      <th>mean</th>\n",
       "      <th>mean</th>\n",
       "      <th>mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>function</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Py_kgeneration</th>\n",
       "      <td>33.433333</td>\n",
       "      <td>0.018899</td>\n",
       "      <td>0.018899</td>\n",
       "      <td>0.018899</td>\n",
       "      <td>0.018899</td>\n",
       "      <td>1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>50.1</td>\n",
       "      <td>41.276471</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.1</td>\n",
       "      <td>100.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>generate</th>\n",
       "      <td>50.100000</td>\n",
       "      <td>0.011741</td>\n",
       "      <td>0.011741</td>\n",
       "      <td>0.011741</td>\n",
       "      <td>0.011741</td>\n",
       "      <td>1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>100.1</td>\n",
       "      <td>44.544444</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.1</td>\n",
       "      <td>100.1</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    CPU_1      time                                     CPU_2  \\\n",
       "                     mean       min       max      mean       sum count  mean   \n",
       "function                                                                        \n",
       "Py_kgeneration  33.433333  0.018899  0.018899  0.018899  0.018899     1   0.1   \n",
       "generate        50.100000  0.011741  0.011741  0.011741  0.011741     1   0.1   \n",
       "\n",
       "                CPU_3      CPU_0 memory             CPU_6  CPU_7 CPU_4 CPU_5  \n",
       "                 mean       mean    min  max median  mean   mean  mean  mean  \n",
       "function                                                                      \n",
       "Py_kgeneration   50.1  41.276471    0.5  0.5    0.5   0.1  100.1   0.1   0.1  \n",
       "generate        100.1  44.544444    0.5  0.5    0.5   0.1  100.1   0.1   0.1  "
      ]
     },
     "execution_count": 157,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "df = analyze('log_8-aa')"
+    "df = analyze('log_8-aa')\n",
    "\n",
    "\n",
    "df"
   ]
  },
  {