{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "58b4cc1c-dfb8-4459-b1c8-2629f3471b24",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n",
      "The lab_black extension is already loaded. To reload it, use:\n",
      "  %reload_ext lab_black\n",
      "\n",
      "---------------------------------\n",
      "Working on the host: Joachims-MacBook-Pro.local\n",
      "\n",
      "---------------------------------\n",
      "Python version: 3.10.2 | packaged by conda-forge | (main, Feb  1 2022, 19:30:18) [Clang 11.1.0 ]\n",
      "\n",
      "---------------------------------\n",
      "Python interpreter: /opt/miniconda3/envs/srh/bin/python\n"
     ]
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "# Load the \"autoreload\" extension\n",
    "%load_ext autoreload\n",
    "# always reload modules\n",
    "%autoreload 2\n",
    "# black formatter for jupyter notebooks\n",
    "#%load_ext nb_black\n",
    "# black formatter for jupyter lab\n",
    "%load_ext lab_black\n",
    "\n",
    "%run ../../../src/notebook_env.py"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a2e0e45e-1b51-49a2-b2bd-8f303ab1232e",
   "metadata": {},
   "source": [
    "# Pandas Dataframes"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d114516d-ebe9-4cac-8bb4-bea3f7af83b2",
   "metadata": {},
   "source": [
    "In dem folgenden Lehrbeispiel diskutieren wir ein paar wenige grundsätzliche Methoden für den Umgang mit Dataframes."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "056dae9b-fe73-4539-a3b7-03bf819b235d",
   "metadata": {},
   "source": [
    "> ## Erstellen Sie einen Pandas Datenframe mit den Spalten `numbers`, `colors` und `frequency` und den jeweilig dazugehörenden Werten `[1, 2, 3]`, `[red, white, blue]` und `[220, 440, 880]`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "13d8d2cc-c142-4c05-b133-da8b68a75ac0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>numbers</th>\n",
       "      <th>colors</th>\n",
       "      <th>frequency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>red</td>\n",
       "      <td>220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>white</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>blue</td>\n",
       "      <td>880</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   numbers colors  frequency\n",
       "0        1    red        220\n",
       "1        2  white        440\n",
       "2        3   blue        880"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.DataFrame(\n",
    "    {\n",
    "        \"numbers\": [1, 2, 3],\n",
    "        \"colors\": [\"red\", \"white\", \"blue\"],\n",
    "        \"frequency\": [220, 440, 880],\n",
    "    }\n",
    ")\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0207fbe4-a314-46dd-9847-99bedbca25c7",
   "metadata": {},
   "source": [
    "2. Wir können auch alternativ einen leeren Dataframe erstellen und die Daten Spaltenweise hinzufügen."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "67723ed7-c549-4969-9533-a4cd6683faf2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>numbers</th>\n",
       "      <th>colors</th>\n",
       "      <th>frequency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>red</td>\n",
       "      <td>220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>white</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>blue</td>\n",
       "      <td>880</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   numbers colors  frequency\n",
       "0        1    red        220\n",
       "1        2  white        440\n",
       "2        3   blue        880"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame()\n",
    "df[\"numbers\"] = [1, 2, 3]\n",
    "df[\"colors\"] = [\"red\", \"white\", \"blue\"]\n",
    "df[\"frequency\"] = [220, 440, 880]\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4746b756-d5fb-48aa-88c0-e710b131e23d",
   "metadata": {},
   "source": [
    "3. Die erste Spalte wird Index genannt. Wir können mit der Methode `loc[Index]` Zeilenweise Elemente auswählen."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "643ad3d8-7e8d-40ff-8ac6-ebcd876bcfc1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Erste Zeile von df\n",
      "numbers        1\n",
      "colors       red\n",
      "frequency    220\n",
      "Name: 0, dtype: object\n",
      "Zweite Zeile von df\n",
      "numbers          2\n",
      "colors       white\n",
      "frequency      440\n",
      "Name: 1, dtype: object\n",
      "Dritte Zeile von df\n",
      "numbers         3\n",
      "colors       blue\n",
      "frequency     880\n",
      "Name: 2, dtype: object\n"
     ]
    }
   ],
   "source": [
    "print('Erste Zeile von df')\n",
    "print(df.loc[0])\n",
    "print('Zweite Zeile von df')\n",
    "print(df.loc[1])\n",
    "print('Dritte Zeile von df')\n",
    "print(df.loc[2])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "466c4806-c58b-48ff-8c6b-7afb6a3bde2e",
   "metadata": {},
   "source": [
    "4. In ähnlicher Weise können wir `loc()` verwenden, wenn wir Spalten anhand ihrer Namen auswählen wollen"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "98f86f2e-5d14-4356-ba7f-20081a4dcb82",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   numbers  frequency\n",
      "0        1        220\n",
      "1        2        440\n",
      "2        3        880\n"
     ]
    }
   ],
   "source": [
    "print(df.loc[:,['numbers','frequency']])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "57340171-7691-46eb-920d-c8d21b4d90b1",
   "metadata": {},
   "source": [
    "5. Wir können auch die Methoden `iloc()` und `loc()` verwenden, um mehrere Spalten auszuwählen.\n",
    "    Wenn wir die Spaltenindizes verwenden wollen, um sie zu extrahieren, können wir `iloc()` verwenden, wie im folgenden Beispiel gezeigt:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "654576fd-5d1b-4da3-9de3-ae501f53769c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   numbers  frequency\n",
      "0        1        220\n"
     ]
    }
   ],
   "source": [
    "print(df.iloc[[0],[0,2]])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9d661a8-9d04-42ae-acb0-15920fef950c",
   "metadata": {},
   "source": [
    "5. Man kann so auch einzelne Elemente auswählen."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "58e20a23-0c57-4f81-95b7-801deb831139",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   numbers\n",
      "2        3\n"
     ]
    }
   ],
   "source": [
    "print(df.iloc[[2],[0]])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cb92ebd5-e343-4ebc-a1ea-6af28a682f1b",
   "metadata": {},
   "source": [
    "6. ... oder überschreiben."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "598c2954-bfc9-423d-bcf8-a6e5d9374a17",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   numbers\n",
      "2        5\n"
     ]
    }
   ],
   "source": [
    "df.iloc[[2],[0]] = 5\n",
    "print(df.iloc[[2],[0]])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5168b5c9-d6bf-4bcc-a5e2-d23499b988a0",
   "metadata": {},
   "source": [
    "7. Es ist auch möglich mit `loc()` Daten zu Filtern indem wir logische Verknüpfungen verwenden"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "1cf23ff4-34cd-47ea-be4d-e67c58095fb7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>numbers</th>\n",
       "      <th>colors</th>\n",
       "      <th>frequency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>red</td>\n",
       "      <td>220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>white</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   numbers colors  frequency\n",
       "0        1    red        220\n",
       "1        2  white        440"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = df.loc[df['frequency'] < 441]\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "bf8c8e2f-35a6-453e-8030-4cd12af110bc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>numbers</th>\n",
       "      <th>colors</th>\n",
       "      <th>frequency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>red</td>\n",
       "      <td>220</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   numbers colors  frequency\n",
       "0        1    red        220"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = df.loc[df['colors'] == 'red']\n",
    "df2"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "acb2d051-8260-49df-ab4a-3a3dd866a677",
   "metadata": {},
   "source": [
    "8. Man kann logische Verknüpfungen auch kombinieren um spezifischer zu filtern"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "405db2d9-3847-4bbb-be2e-8ccd518d8349",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>numbers</th>\n",
       "      <th>colors</th>\n",
       "      <th>frequency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>red</td>\n",
       "      <td>220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>white</td>\n",
       "      <td>440</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   numbers colors  frequency\n",
       "0        1    red        220\n",
       "1        2  white        440"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = df.loc[(df['numbers'] >= 1) & (df['frequency'] < 441)]\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "78e9b627-c3e1-4ec8-807e-df5de19a34e4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}