import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

data = pd.read_csv("UCI/breast-cancer.data", header=None, names=['Class', 'age', 'menopause', 'tumor-size', 'inv-nodes', 'node-caps', 'deg-malig', 'breast', 'breast-quad', 'irradiat'])
# Usunięcie wierszy z brakującymi danymi (oznaczone z wpisem '?'
#data_del = data[(data['node-caps'] != '?') & (data['breast-quad'] != '?')]

data.loc[:, 'Class'] = data['Class'].map({'no-recurrence-events': 0, 'recurrence-events': 1}).astype(int)
data_encoded = pd.get_dummies(data, columns=['age', 'menopause', 'tumor-size', 'inv-nodes', 'node-caps', 'deg-malig', 'breast', 'breast-quad', 'irradiat'], drop_first=True)

X = data_encoded.drop(columns=['Class'])
y = data_encoded['Class'].astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline_with_scaling = Pipeline([ # Pipeline z standaryzacją
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression())
])

pipeline_with_scaling.fit(X_train, y_train)

y_pred_scal = pipeline_with_scaling.predict(X_test)
print("Metryki ze standaryzacją:")
print("Dokładność (accuracy):", accuracy_score(y_test, y_pred_scal))
print("Precyzja (precision):", precision_score(y_test, y_pred_scal))
print("Czułość (recall):",  recall_score(y_test, y_pred_scal))
print("F1-score:", f1_score(y_test, y_pred_scal))


pipeline_no_scaling = Pipeline([ # Pipeline bez standaryzacji
    ('classifier', LogisticRegression())
])

pipeline_no_scaling.fit(X_train, y_train)
y_pred = pipeline_no_scaling.predict(X_test)
print("\nMetryki bez standaryzacji:")
print("Dokładność (accuracy):", accuracy_score(y_test, y_pred))
print("Precyzja (precision):", precision_score(y_test, y_pred))
print("Czułość (recall):", recall_score(y_test, y_pred))
print("F1-score:", f1_score(y_test, y_pred))