Regressão Linear Multipla em Python
Categories: Machine Learning
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 9 17:09:56 2021
@author: rafaeldontalgoncalez
"""
######################################
# Importando as libraries
######################################
import pandas as pd
import sklearn.model_selection as ms
import sklearn.linear_model as lm
import matplotlib.pyplot as plt
import numpy as np
######################################
# Importa o dataset
######################################
dataset = pd.read_csv("/Users/rafaeldontalgoncalez/Downloads/2020_Data_Professional_Salary_MultipleFeatures.csv")
dataset = dataset.dropna()
X = dataset.iloc
y = dataset.iloc.values
######################################
# Codificando variaveis Dummy
######################################
X_dummies = pd.get_dummies(X)
######################################
# Separar dados em Treino e Teste
######################################
X_train, X_test, y_train, y_test = ms.train_test_split(X_dummies, y, test_size = 1/5, random_state = 0)
######################################
# Treinando o modelo
######################################
regressor = lm.LinearRegression()
regressor.fit(X_train, y_train)
######################################
# Previsao
######################################
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
result = np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1)
def undummify(df, prefix_sep="_"):
cols2collapse = {
item.split(prefix_sep): (prefix_sep in item) for item in df.columns
}
series_list =
for col, needs_to_collapse in cols2collapse.items():
if needs_to_collapse:
undummified = (
df.filter(like=col)
.idxmax(axis=1)
.apply(lambda x: x.split(prefix_sep, maxsplit=1))
.rename(col)
)
series_list.append(undummified)
else:
series_list.append(df)
undummified_df = pd.concat(series_list, axis=1)
return undummified_df
X_reverse = undummify(X_test)
X_reverse = X_reverse.reset_index(drop=True)
y_compare = pd.DataFrame(result)
y_compare = y_compare.rename(index=str, columns={0:'y_pred', 1:'y_test'})
y_compare = y_compare.reset_index(drop=True)
resultado_final = pd.concat(, axis=1)
######################################
# Valor Especifico
######################################
print(regressor.predict(]))
Olá meu povo e minha pova. Esse é código usado no video Regressão Linear Multipla.
O dataset que eu usei foi esse aqui:
2020_Data_Professional_Salary_MultipleFeatures
Qualquer dúvida, só mandar! Ciao!
Rafa