import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
# Sample data creation
data = {
'positive_comments': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
'client_likes_service': [0, 0, 0, 1, 1, 1, 1, 1, 1, 1] # 0 = No, 1 = Yes
}
df = pd.DataFrame(data)
# Data preprocessing
X = df[['positive_comments']]
y = df['client_likes_service']
# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Model training
model = LogisticRegression()
model.fit(X_train, y_train)
# Model evaluation
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
# Visualization
plt.scatter(df['positive_comments'], df['client_likes_service'], color='blue')
plt.title('Positive Comments vs Client Satisfaction')
plt.xlabel('Number of Positive Comments')
plt.ylabel('Client Likes Service (0 = No, 1 = Yes)')
plt.axhline(0.5, color='red', linestyle='--') # Threshold line
plt.show()