본문 바로가기

프로그래머스 데브 코스/TIL

[6기] 프로그래머스 인공지능 데브코스 43일차 TIL

1013

[8주차 - Day3] ML_basics - 실습

데이터 전처리

과제 시작
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

dataset = pd.read_csv("./delivery_raw.csv")
df = dataset.copy()
df.head(5)

df = pd.read_csv("./delivery_raw.csv", delimiter='\t')
df

df.dtypes

df['created_at_time'] = pd.to_datetime(df['created_at'])
df['actual_delivery_time_time'] = pd.to_datetime(df['actual_delivery_time'])
df['time'] = df['actual_delivery_time_time'] - df['created_at_time']
df['delivery_time'] = df['time'].dt.total_seconds()
df.dtypes

df['actual_delivery_time']

df = df.drop(labels=['time', 'actual_delivery_time', 'created_at',
                     'created_at_time', 'actual_delivery_time_time'],axis=1)
df

df.isnull().sum()

df_del = df.dropna()
df_del.isnull().sum()

df_del

df_del.reset_index(drop=False, inplace=True)
df_del = df_del.drop(labels=['index'],axis=1)
df_del

df_del['store_primary_category'].value_counts()

category = []

category = df_del['store_primary_category'].unique().tolist()
df_category = df_del['store_primary_category'].to_list()

for i in range(len(df_category)):
    df_category[i] = category.index(df_category[i])

df_del['store_primary_category'] = df_category
df_del

df_del.corr()

plt.figure(figsize=(20,15))
sns.heatmap(data = df_del.corr(), annot=True, fmt = '.2f', linewidths=.5, cmap='Blues')   # 상관관계 히트맵을 통해 눈으로 확인