import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
iris = pd.DataFrame(iris.data, columns=iris.feature_names)
iris["Class"]=load_iris().target
iris
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 |
... | ... | ... | ... | ... | ... |
145 | 6.7 | 3.0 | 5.2 | 2.3 | 2 |
146 | 6.3 | 2.5 | 5.0 | 1.9 | 2 |
147 | 6.5 | 3.0 | 5.2 | 2.0 | 2 |
148 | 6.2 | 3.4 | 5.4 | 2.3 | 2 |
149 | 5.9 | 3.0 | 5.1 | 1.8 | 2 |
150 rows × 5 columns
Class 단위로 카운트값을 새로운 컬럼에 추가하기¶
Class에 조건을 건다 -> 새로운컬럼에 값을 넣는다 ??¶
iris[iris['Class']==0] # Class가 0인 값들
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 |
5 | 5.4 | 3.9 | 1.7 | 0.4 | 0 |
6 | 4.6 | 3.4 | 1.4 | 0.3 | 0 |
7 | 5.0 | 3.4 | 1.5 | 0.2 | 0 |
8 | 4.4 | 2.9 | 1.4 | 0.2 | 0 |
9 | 4.9 | 3.1 | 1.5 | 0.1 | 0 |
10 | 5.4 | 3.7 | 1.5 | 0.2 | 0 |
11 | 4.8 | 3.4 | 1.6 | 0.2 | 0 |
12 | 4.8 | 3.0 | 1.4 | 0.1 | 0 |
13 | 4.3 | 3.0 | 1.1 | 0.1 | 0 |
14 | 5.8 | 4.0 | 1.2 | 0.2 | 0 |
15 | 5.7 | 4.4 | 1.5 | 0.4 | 0 |
16 | 5.4 | 3.9 | 1.3 | 0.4 | 0 |
17 | 5.1 | 3.5 | 1.4 | 0.3 | 0 |
18 | 5.7 | 3.8 | 1.7 | 0.3 | 0 |
19 | 5.1 | 3.8 | 1.5 | 0.3 | 0 |
20 | 5.4 | 3.4 | 1.7 | 0.2 | 0 |
21 | 5.1 | 3.7 | 1.5 | 0.4 | 0 |
22 | 4.6 | 3.6 | 1.0 | 0.2 | 0 |
23 | 5.1 | 3.3 | 1.7 | 0.5 | 0 |
24 | 4.8 | 3.4 | 1.9 | 0.2 | 0 |
25 | 5.0 | 3.0 | 1.6 | 0.2 | 0 |
26 | 5.0 | 3.4 | 1.6 | 0.4 | 0 |
27 | 5.2 | 3.5 | 1.5 | 0.2 | 0 |
28 | 5.2 | 3.4 | 1.4 | 0.2 | 0 |
29 | 4.7 | 3.2 | 1.6 | 0.2 | 0 |
30 | 4.8 | 3.1 | 1.6 | 0.2 | 0 |
31 | 5.4 | 3.4 | 1.5 | 0.4 | 0 |
32 | 5.2 | 4.1 | 1.5 | 0.1 | 0 |
33 | 5.5 | 4.2 | 1.4 | 0.2 | 0 |
34 | 4.9 | 3.1 | 1.5 | 0.2 | 0 |
35 | 5.0 | 3.2 | 1.2 | 0.2 | 0 |
36 | 5.5 | 3.5 | 1.3 | 0.2 | 0 |
37 | 4.9 | 3.6 | 1.4 | 0.1 | 0 |
38 | 4.4 | 3.0 | 1.3 | 0.2 | 0 |
39 | 5.1 | 3.4 | 1.5 | 0.2 | 0 |
40 | 5.0 | 3.5 | 1.3 | 0.3 | 0 |
41 | 4.5 | 2.3 | 1.3 | 0.3 | 0 |
42 | 4.4 | 3.2 | 1.3 | 0.2 | 0 |
43 | 5.0 | 3.5 | 1.6 | 0.6 | 0 |
44 | 5.1 | 3.8 | 1.9 | 0.4 | 0 |
45 | 4.8 | 3.0 | 1.4 | 0.3 | 0 |
46 | 5.1 | 3.8 | 1.6 | 0.2 | 0 |
47 | 4.6 | 3.2 | 1.4 | 0.2 | 0 |
48 | 5.3 | 3.7 | 1.5 | 0.2 | 0 |
49 | 5.0 | 3.3 | 1.4 | 0.2 | 0 |
새로운 컬럼에 숫자값을 넣어보자¶
iris["idx"]=0
iris.head()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | idx | |
---|---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 | 0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 | 0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 | 0 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 | 0 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 | 0 |
iris[iris['Class']==0]["idx"]=1
C:\Users\jun\AppData\Local\Temp\ipykernel_12876\2747504533.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy iris[iris['Class']==0]["idx"]=1
iris_cpy=iris[iris['Class']==0].copy()
iris_cpy.head()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | idx | |
---|---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 | 0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 | 0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 | 0 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 | 0 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 | 0 |
iris_cpy["idx"]=1
iris[iris['Class']==0]=iris_cpy
iris.head()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | idx | |
---|---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 | 1 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 | 1 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 | 1 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 | 1 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 | 1 |
idx 에 1이라는 값을 넣었습니다.¶
이번에는 정해진 상수가 아니라 index를 넣어보겠습니다.¶
iris_uni=iris["Class"].unique()
iris_uni
array([0, 1, 2])
df = iris.copy()
x = 0
def fun_idx(a):
global x
x = x + 1
return x
for uni in iris_uni:
iris_cpy=df[df['Class']==uni].copy()
x = 0
iris_cpy["idx"]=iris_cpy["idx"].apply(fun_idx)
df[df['Class']==uni]=iris_cpy
df
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | Class | idx | |
---|---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0 | 1 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0 | 2 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0 | 3 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0 | 4 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0 | 5 |
... | ... | ... | ... | ... | ... | ... |
145 | 6.7 | 3.0 | 5.2 | 2.3 | 2 | 46 |
146 | 6.3 | 2.5 | 5.0 | 1.9 | 2 | 47 |
147 | 6.5 | 3.0 | 5.2 | 2.0 | 2 | 48 |
148 | 6.2 | 3.4 | 5.4 | 2.3 | 2 | 49 |
149 | 5.9 | 3.0 | 5.1 | 1.8 | 2 | 50 |
150 rows × 6 columns