{
"cells": [
{
"cell_type": "markdown",
"id": "f5ff7131",
"metadata": {},
"source": [
"# pandas: Create DataFrame"
]
},
{
"cell_type": "markdown",
"id": "1236cf26",
"metadata": {},
"source": [
"## Package Import"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "4d08380d",
"metadata": {
"execution": {
"iopub.execute_input": "2025-09-02T18:59:30.443023Z",
"iopub.status.busy": "2025-09-02T18:59:30.442784Z",
"iopub.status.idle": "2025-09-02T18:59:30.661256Z",
"shell.execute_reply": "2025-09-02T18:59:30.660733Z"
},
"tags": [
"hide-output",
"scroll-output"
]
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"id": "0637d85b",
"metadata": {},
"source": [
"## Dataset Import"
]
},
{
"cell_type": "markdown",
"id": "a7b67820",
"metadata": {},
"source": [
"The dataset used in this notebook is from [Kaggle - Pokemon](https://www.kaggle.com/datasets/abcsds/pokemon)."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3332eedd",
"metadata": {
"execution": {
"iopub.execute_input": "2025-09-02T18:59:30.662883Z",
"iopub.status.busy": "2025-09-02T18:59:30.662738Z",
"iopub.status.idle": "2025-09-02T18:59:30.667277Z",
"shell.execute_reply": "2025-09-02T18:59:30.666711Z"
},
"tags": [
"hide-output",
"scroll-output"
]
},
"outputs": [],
"source": [
"data = pd.read_csv('data/Pokemon.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0b6ef7ad",
"metadata": {
"execution": {
"iopub.execute_input": "2025-09-02T18:59:30.668426Z",
"iopub.status.busy": "2025-09-02T18:59:30.668308Z",
"iopub.status.idle": "2025-09-02T18:59:30.677097Z",
"shell.execute_reply": "2025-09-02T18:59:30.676363Z"
},
"tags": [
"hide-output",
"scroll-output"
]
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" # | \n",
" Name | \n",
" Type 1 | \n",
" Type 2 | \n",
" Total | \n",
" HP | \n",
" Attack | \n",
" Defense | \n",
" Sp. Atk | \n",
" Sp. Def | \n",
" Speed | \n",
" Generation | \n",
" Legendary | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" Bulbasaur | \n",
" Grass | \n",
" Poison | \n",
" 318 | \n",
" 45 | \n",
" 49 | \n",
" 49 | \n",
" 65 | \n",
" 65 | \n",
" 45 | \n",
" 1 | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" Ivysaur | \n",
" Grass | \n",
" Poison | \n",
" 405 | \n",
" 60 | \n",
" 62 | \n",
" 63 | \n",
" 80 | \n",
" 80 | \n",
" 60 | \n",
" 1 | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" Venusaur | \n",
" Grass | \n",
" Poison | \n",
" 525 | \n",
" 80 | \n",
" 82 | \n",
" 83 | \n",
" 100 | \n",
" 100 | \n",
" 80 | \n",
" 1 | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" VenusaurMega Venusaur | \n",
" Grass | \n",
" Poison | \n",
" 625 | \n",
" 80 | \n",
" 100 | \n",
" 123 | \n",
" 122 | \n",
" 120 | \n",
" 80 | \n",
" 1 | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" Charmander | \n",
" Fire | \n",
" NaN | \n",
" 309 | \n",
" 39 | \n",
" 52 | \n",
" 43 | \n",
" 60 | \n",
" 50 | \n",
" 65 | \n",
" 1 | \n",
" False | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 795 | \n",
" 719 | \n",
" Diancie | \n",
" Rock | \n",
" Fairy | \n",
" 600 | \n",
" 50 | \n",
" 100 | \n",
" 150 | \n",
" 100 | \n",
" 150 | \n",
" 50 | \n",
" 6 | \n",
" True | \n",
"
\n",
" \n",
" 796 | \n",
" 719 | \n",
" DiancieMega Diancie | \n",
" Rock | \n",
" Fairy | \n",
" 700 | \n",
" 50 | \n",
" 160 | \n",
" 110 | \n",
" 160 | \n",
" 110 | \n",
" 110 | \n",
" 6 | \n",
" True | \n",
"
\n",
" \n",
" 797 | \n",
" 720 | \n",
" HoopaHoopa Confined | \n",
" Psychic | \n",
" Ghost | \n",
" 600 | \n",
" 80 | \n",
" 110 | \n",
" 60 | \n",
" 150 | \n",
" 130 | \n",
" 70 | \n",
" 6 | \n",
" True | \n",
"
\n",
" \n",
" 798 | \n",
" 720 | \n",
" HoopaHoopa Unbound | \n",
" Psychic | \n",
" Dark | \n",
" 680 | \n",
" 80 | \n",
" 160 | \n",
" 60 | \n",
" 170 | \n",
" 130 | \n",
" 80 | \n",
" 6 | \n",
" True | \n",
"
\n",
" \n",
" 799 | \n",
" 721 | \n",
" Volcanion | \n",
" Fire | \n",
" Water | \n",
" 600 | \n",
" 80 | \n",
" 110 | \n",
" 120 | \n",
" 130 | \n",
" 90 | \n",
" 70 | \n",
" 6 | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
800 rows × 13 columns
\n",
"
"
],
"text/plain": [
" # Name Type 1 Type 2 Total HP Attack Defense \\\n",
"0 1 Bulbasaur Grass Poison 318 45 49 49 \n",
"1 2 Ivysaur Grass Poison 405 60 62 63 \n",
"2 3 Venusaur Grass Poison 525 80 82 83 \n",
"3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 \n",
"4 4 Charmander Fire NaN 309 39 52 43 \n",
".. ... ... ... ... ... .. ... ... \n",
"795 719 Diancie Rock Fairy 600 50 100 150 \n",
"796 719 DiancieMega Diancie Rock Fairy 700 50 160 110 \n",
"797 720 HoopaHoopa Confined Psychic Ghost 600 80 110 60 \n",
"798 720 HoopaHoopa Unbound Psychic Dark 680 80 160 60 \n",
"799 721 Volcanion Fire Water 600 80 110 120 \n",
"\n",
" Sp. Atk Sp. Def Speed Generation Legendary \n",
"0 65 65 45 1 False \n",
"1 80 80 60 1 False \n",
"2 100 100 80 1 False \n",
"3 122 120 80 1 False \n",
"4 60 50 65 1 False \n",
".. ... ... ... ... ... \n",
"795 100 150 50 6 True \n",
"796 160 110 110 6 True \n",
"797 150 130 70 6 True \n",
"798 170 130 80 6 True \n",
"799 130 90 70 6 True \n",
"\n",
"[800 rows x 13 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "markdown",
"id": "cfb7929a",
"metadata": {},
"source": [
"## Manually Create a DataFrame"
]
},
{
"cell_type": "markdown",
"id": "f3fe735d",
"metadata": {},
"source": [
"From a Dictionary\\\n",
"The columns order is the order of keys insertion:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "16659cf2",
"metadata": {
"execution": {
"iopub.execute_input": "2025-09-02T18:59:30.678509Z",
"iopub.status.busy": "2025-09-02T18:59:30.678391Z",
"iopub.status.idle": "2025-09-02T18:59:30.682242Z",
"shell.execute_reply": "2025-09-02T18:59:30.681773Z"
},
"tags": [
"hide-output",
"scroll-output"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Column 1 | \n",
" Column 2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 100 | \n",
" 300 | \n",
"
\n",
" \n",
" 1 | \n",
" 200 | \n",
" 400 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Column 1 Column 2\n",
"0 100 300\n",
"1 200 400"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({'Column 1': [100,200], 'Column 2': [300,400]})\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "7548cdfa",
"metadata": {},
"source": [
"From a list of random values w/ column names:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "d096ed3b",
"metadata": {
"execution": {
"iopub.execute_input": "2025-09-02T18:59:30.683429Z",
"iopub.status.busy": "2025-09-02T18:59:30.683324Z",
"iopub.status.idle": "2025-09-02T18:59:30.688463Z",
"shell.execute_reply": "2025-09-02T18:59:30.688075Z"
},
"tags": [
"hide-output",
"scroll-output"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
" d | \n",
" e | \n",
" f | \n",
" g | \n",
" h | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.155808 | \n",
" 0.426032 | \n",
" 0.828060 | \n",
" 0.220790 | \n",
" 0.990389 | \n",
" 0.747130 | \n",
" 0.048883 | \n",
" 0.967204 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.459662 | \n",
" 0.909124 | \n",
" 0.942143 | \n",
" 0.585818 | \n",
" 0.165209 | \n",
" 0.396012 | \n",
" 0.585996 | \n",
" 0.012020 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.451281 | \n",
" 0.454347 | \n",
" 0.902485 | \n",
" 0.801921 | \n",
" 0.007451 | \n",
" 0.954774 | \n",
" 0.510324 | \n",
" 0.733164 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.027769 | \n",
" 0.490229 | \n",
" 0.210495 | \n",
" 0.879985 | \n",
" 0.370400 | \n",
" 0.412179 | \n",
" 0.689901 | \n",
" 0.277350 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c d e f g \\\n",
"0 0.155808 0.426032 0.828060 0.220790 0.990389 0.747130 0.048883 \n",
"1 0.459662 0.909124 0.942143 0.585818 0.165209 0.396012 0.585996 \n",
"2 0.451281 0.454347 0.902485 0.801921 0.007451 0.954774 0.510324 \n",
"3 0.027769 0.490229 0.210495 0.879985 0.370400 0.412179 0.689901 \n",
"\n",
" h \n",
"0 0.967204 \n",
"1 0.012020 \n",
"2 0.733164 \n",
"3 0.277350 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(np.random.rand(4, 8), columns=list('abcdefgh'))"
]
},
{
"cell_type": "markdown",
"id": "4fe398e7",
"metadata": {},
"source": [
"From a dictionary including Series:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "6aff393b",
"metadata": {
"execution": {
"iopub.execute_input": "2025-09-02T18:59:30.689498Z",
"iopub.status.busy": "2025-09-02T18:59:30.689395Z",
"iopub.status.idle": "2025-09-02T18:59:30.693945Z",
"shell.execute_reply": "2025-09-02T18:59:30.693559Z"
},
"tags": [
"hide-output",
"scroll-output"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" col1 | \n",
" col2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 3.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" col1 col2\n",
"0 0 NaN\n",
"1 1 NaN\n",
"2 2 2.0\n",
"3 3 3.0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame({'col1': [0,1,2,3], 'col2': pd.Series([2,3], index=[2,3])}, index=[0,1,2,3])"
]
},
{
"cell_type": "markdown",
"id": "571d74e9",
"metadata": {},
"source": [
"From numpy ndarray:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d5a4f1cb",
"metadata": {
"execution": {
"iopub.execute_input": "2025-09-02T18:59:30.695111Z",
"iopub.status.busy": "2025-09-02T18:59:30.695001Z",
"iopub.status.idle": "2025-09-02T18:59:30.698608Z",
"shell.execute_reply": "2025-09-02T18:59:30.698199Z"
},
"tags": [
"hide-output",
"scroll-output"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 2 | \n",
" 3 | \n",
"
\n",
" \n",
" 1 | \n",
" 4 | \n",
" 5 | \n",
" 6 | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 8 | \n",
" 9 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"0 1 2 3\n",
"1 4 5 6\n",
"2 7 8 9"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),\n",
" columns=['a', 'b', 'c'])\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "69f53fc4",
"metadata": {},
"source": [
"From a numpy ndarray that has labeled columns:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ca2cdf3d",
"metadata": {
"execution": {
"iopub.execute_input": "2025-09-02T18:59:30.699746Z",
"iopub.status.busy": "2025-09-02T18:59:30.699641Z",
"iopub.status.idle": "2025-09-02T18:59:30.703576Z",
"shell.execute_reply": "2025-09-02T18:59:30.703168Z"
},
"tags": [
"hide-output",
"scroll-output"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" c | \n",
" a | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 3 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 6 | \n",
" 4 | \n",
"
\n",
" \n",
" 2 | \n",
" 9 | \n",
" 7 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" c a\n",
"0 3 1\n",
"1 6 4\n",
"2 9 7"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"d = np.array([(1,2,3), (4,5,6), (7,8,9)], dtype=[(\"a\", \"i4\"), (\"b\", \"i4\"), (\"c\", \"i4\")])\n",
"df = pd.DataFrame(data=d, columns=['c', 'a'])\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "654b5879",
"metadata": {},
"source": [
"From Series/DataFrame:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "e7d10002",
"metadata": {
"execution": {
"iopub.execute_input": "2025-09-02T18:59:30.704762Z",
"iopub.status.busy": "2025-09-02T18:59:30.704652Z",
"iopub.status.idle": "2025-09-02T18:59:30.708449Z",
"shell.execute_reply": "2025-09-02T18:59:30.708072Z"
},
"tags": [
"hide-output",
"scroll-output"
]
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" hehe | \n",
"
\n",
" \n",
" \n",
" \n",
" c | \n",
" 3 | \n",
"
\n",
" \n",
" a | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" hehe\n",
"c 3\n",
"a 1"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ser = pd.Series([1,2,3], index=['a','b','c'])\n",
"df = pd.DataFrame(data=ser, index=['c', 'a'], columns=['hehe'])\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "e125321a",
"metadata": {},
"source": [
"If we construct from DataFrame, then the columns in the new DataFrame must be a subset of the original columns. If not, the new columns will be filled with NaN."
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "87bc4465",
"metadata": {
"execution": {
"iopub.execute_input": "2025-09-02T18:59:30.709768Z",
"iopub.status.busy": "2025-09-02T18:59:30.709658Z",
"iopub.status.idle": "2025-09-02T18:59:30.713634Z",
"shell.execute_reply": "2025-09-02T18:59:30.713248Z"
},
"tags": [
"hide-output",
"scroll-output"
]
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" x\n",
"c 3\n",
"a 1 \n",
" z\n",
"c NaN\n",
"a NaN\n"
]
}
],
"source": [
"df1 = pd.DataFrame([1,2,3], index=['a','b','c'], columns=['x'])\n",
"df2 = pd.DataFrame(data=df1, index=['c', 'a'])\n",
"df3 = pd.DataFrame(data=df1, index=['c', 'a'], columns=['z'])\n",
"print(df2, '\\n',df3)"
]
}
],
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}