from scipy.io import arff
import numpy as np
import pandas as pd
radius = 4 # search radius
def distance(point1, point2):
return np.sqrt(np.sum([(point1[i] - point2[i]) ** 2 for i in range(4)]))
weather = arff.loadarff('../dataset/weather.nominal.arff')
df = pd.DataFrame(weather[0])
df = df.sample(frac=1)
attribute_columns = df.columns
train_data = df.iloc[:10]
test_data = df.iloc[10:]
train_length = train_data.shape[0]
test_length = test_data.shape[0]
def predict(x, data):
whole_length = data.shape[0]
yes_no_count = data['play'].value_counts()
yes_p = yes_no_count[b'yes'] / whole_length
no_p = yes_no_count[b'no'] / whole_length
attribute_list = list(x.index)
for attribute in attribute_list:
yes_count = data[data['play'] == b'yes'][attribute].value_counts()
no_count = data[data['play'] == b'no'][attribute].value_counts()
yes_sum = np.sum(yes_count)
no_sum = np.sum(no_count)
yes_attribute_count = 0
no_attribute_count = 0
if x[attribute] not in yes_count.index:
yes_attribute_count = 0
elif x[attribute] not in no_count.index:
no_attribute_count = 0
else:
yes_attribute_count = yes_count[x[attribute]]
no_attribute_count = no_count[x[attribute]]
yes_p *= ((yes_attribute_count + 1) / (yes_sum + 1))
no_p *= ((no_attribute_count + 1) / (no_sum + 1))
if yes_p >= no_p:
return b'yes'
else:
return b'no'
y = test_data.iloc[:, 4]
y_predict_list = []
for i in range(test_length):
x = test_data.iloc[i, 0:4]
y_predict = predict(x, train_data)
y_predict_list.append(y_predict)
print('test acc: %.2f' % (np.mean(y_predict_list == y)))
转载:https://blog.csdn.net/Zjhao666/article/details/101695567
查看评论