1013
[8주차 - Day3] ML_basics - 실습
데이터 전처리
과제 시작
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
dataset = pd.read_csv("./delivery_raw.csv")
df = dataset.copy()
df.head(5)
df = pd.read_csv("./delivery_raw.csv", delimiter='\t')
df
df.dtypes
df['created_at_time'] = pd.to_datetime(df['created_at'])
df['actual_delivery_time_time'] = pd.to_datetime(df['actual_delivery_time'])
df['time'] = df['actual_delivery_time_time'] - df['created_at_time']
df['delivery_time'] = df['time'].dt.total_seconds()
df.dtypes
df['actual_delivery_time']
df = df.drop(labels=['time', 'actual_delivery_time', 'created_at',
'created_at_time', 'actual_delivery_time_time'],axis=1)
df
df.isnull().sum()
df_del = df.dropna()
df_del.isnull().sum()
df_del
df_del.reset_index(drop=False, inplace=True)
df_del = df_del.drop(labels=['index'],axis=1)
df_del
df_del['store_primary_category'].value_counts()
category = []
category = df_del['store_primary_category'].unique().tolist()
df_category = df_del['store_primary_category'].to_list()
for i in range(len(df_category)):
df_category[i] = category.index(df_category[i])
df_del['store_primary_category'] = df_category
df_del
df_del.corr()
plt.figure(figsize=(20,15))
sns.heatmap(data = df_del.corr(), annot=True, fmt = '.2f', linewidths=.5, cmap='Blues') # 상관관계 히트맵을 통해 눈으로 확인