import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
iris = pd.DataFrame(iris.data, columns=iris.feature_names)
iris["Class"]=load_iris().target
iris
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | |
|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 |
| ... | ... | ... | ... | ... | ... |
| 145 | 6.7 | 3.0 | 5.2 | 2.3 | 2 |
| 146 | 6.3 | 2.5 | 5.0 | 1.9 | 2 |
| 147 | 6.5 | 3.0 | 5.2 | 2.0 | 2 |
| 148 | 6.2 | 3.4 | 5.4 | 2.3 | 2 |
| 149 | 5.9 | 3.0 | 5.1 | 1.8 | 2 |
150 rows × 5 columns
Class 단위로 카운트값을 새로운 컬럼에 추가하기¶
Class에 조건을 건다 -> 새로운컬럼에 값을 넣는다 ??¶
iris[iris['Class']==0] # Class가 0인 값들
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | |
|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 |
| 5 | 5.4 | 3.9 | 1.7 | 0.4 | 0 |
| 6 | 4.6 | 3.4 | 1.4 | 0.3 | 0 |
| 7 | 5.0 | 3.4 | 1.5 | 0.2 | 0 |
| 8 | 4.4 | 2.9 | 1.4 | 0.2 | 0 |
| 9 | 4.9 | 3.1 | 1.5 | 0.1 | 0 |
| 10 | 5.4 | 3.7 | 1.5 | 0.2 | 0 |
| 11 | 4.8 | 3.4 | 1.6 | 0.2 | 0 |
| 12 | 4.8 | 3.0 | 1.4 | 0.1 | 0 |
| 13 | 4.3 | 3.0 | 1.1 | 0.1 | 0 |
| 14 | 5.8 | 4.0 | 1.2 | 0.2 | 0 |
| 15 | 5.7 | 4.4 | 1.5 | 0.4 | 0 |
| 16 | 5.4 | 3.9 | 1.3 | 0.4 | 0 |
| 17 | 5.1 | 3.5 | 1.4 | 0.3 | 0 |
| 18 | 5.7 | 3.8 | 1.7 | 0.3 | 0 |
| 19 | 5.1 | 3.8 | 1.5 | 0.3 | 0 |
| 20 | 5.4 | 3.4 | 1.7 | 0.2 | 0 |
| 21 | 5.1 | 3.7 | 1.5 | 0.4 | 0 |
| 22 | 4.6 | 3.6 | 1.0 | 0.2 | 0 |
| 23 | 5.1 | 3.3 | 1.7 | 0.5 | 0 |
| 24 | 4.8 | 3.4 | 1.9 | 0.2 | 0 |
| 25 | 5.0 | 3.0 | 1.6 | 0.2 | 0 |
| 26 | 5.0 | 3.4 | 1.6 | 0.4 | 0 |
| 27 | 5.2 | 3.5 | 1.5 | 0.2 | 0 |
| 28 | 5.2 | 3.4 | 1.4 | 0.2 | 0 |
| 29 | 4.7 | 3.2 | 1.6 | 0.2 | 0 |
| 30 | 4.8 | 3.1 | 1.6 | 0.2 | 0 |
| 31 | 5.4 | 3.4 | 1.5 | 0.4 | 0 |
| 32 | 5.2 | 4.1 | 1.5 | 0.1 | 0 |
| 33 | 5.5 | 4.2 | 1.4 | 0.2 | 0 |
| 34 | 4.9 | 3.1 | 1.5 | 0.2 | 0 |
| 35 | 5.0 | 3.2 | 1.2 | 0.2 | 0 |
| 36 | 5.5 | 3.5 | 1.3 | 0.2 | 0 |
| 37 | 4.9 | 3.6 | 1.4 | 0.1 | 0 |
| 38 | 4.4 | 3.0 | 1.3 | 0.2 | 0 |
| 39 | 5.1 | 3.4 | 1.5 | 0.2 | 0 |
| 40 | 5.0 | 3.5 | 1.3 | 0.3 | 0 |
| 41 | 4.5 | 2.3 | 1.3 | 0.3 | 0 |
| 42 | 4.4 | 3.2 | 1.3 | 0.2 | 0 |
| 43 | 5.0 | 3.5 | 1.6 | 0.6 | 0 |
| 44 | 5.1 | 3.8 | 1.9 | 0.4 | 0 |
| 45 | 4.8 | 3.0 | 1.4 | 0.3 | 0 |
| 46 | 5.1 | 3.8 | 1.6 | 0.2 | 0 |
| 47 | 4.6 | 3.2 | 1.4 | 0.2 | 0 |
| 48 | 5.3 | 3.7 | 1.5 | 0.2 | 0 |
| 49 | 5.0 | 3.3 | 1.4 | 0.2 | 0 |
새로운 컬럼에 숫자값을 넣어보자¶
iris["idx"]=0
iris.head()
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | idx | |
|---|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 | 0 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 | 0 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 | 0 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 | 0 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 | 0 |
iris[iris['Class']==0]["idx"]=1
C:\Users\jun\AppData\Local\Temp\ipykernel_12876\2747504533.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy iris[iris['Class']==0]["idx"]=1
iris_cpy=iris[iris['Class']==0].copy()
iris_cpy.head()
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | idx | |
|---|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 | 0 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 | 0 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 | 0 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 | 0 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 | 0 |
iris_cpy["idx"]=1
iris[iris['Class']==0]=iris_cpy
iris.head()
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | idx | |
|---|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 | 1 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 | 1 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 | 1 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 | 1 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 | 1 |
idx 에 1이라는 값을 넣었습니다.¶
이번에는 정해진 상수가 아니라 index를 넣어보겠습니다.¶
iris_uni=iris["Class"].unique()
iris_uni
array([0, 1, 2])
df = iris.copy()
x = 0
def fun_idx(a):
global x
x = x + 1
return x
for uni in iris_uni:
iris_cpy=df[df['Class']==uni].copy()
x = 0
iris_cpy["idx"]=iris_cpy["idx"].apply(fun_idx)
df[df['Class']==uni]=iris_cpy
df
| sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | idx | |
|---|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 | 1 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 | 2 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 | 3 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 | 4 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 | 5 |
| ... | ... | ... | ... | ... | ... | ... |
| 145 | 6.7 | 3.0 | 5.2 | 2.3 | 2 | 46 |
| 146 | 6.3 | 2.5 | 5.0 | 1.9 | 2 | 47 |
| 147 | 6.5 | 3.0 | 5.2 | 2.0 | 2 | 48 |
| 148 | 6.2 | 3.4 | 5.4 | 2.3 | 2 | 49 |
| 149 | 5.9 | 3.0 | 5.1 | 1.8 | 2 | 50 |
150 rows × 6 columns