Skip to content
Snippets Groups Projects
Commit 87e4bbec authored by Andreas Nordberg's avatar Andreas Nordberg
Browse files

added more algorithms

parent 0d872da7
No related branches found
No related tags found
No related merge requests found
......@@ -2,8 +2,13 @@ from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn import metrics
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression, SGDClassifier, Perceptron, PassiveAggressiveClassifier
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
import json
import numpy
import warnings
......@@ -35,14 +40,21 @@ X = [' '.join(inner_list) for inner_list in
vect = CountVectorizer()
clf = OneVsRestClassifier(SVC())
clf = RandomForestClassifier()
# clf = DecisionTreeClassifier()
# clf = OneVsRestClassifier(PassiveAggressiveClassifier())
# clf = OneVsRestClassifier(Perceptron())
# clf = OneVsRestClassifier(LogisticRegression(multi_class='ovr'))
# clf = OneVsRestClassifier(GradientBoostingClassifier())
# clf = OneVsRestClassifier(SGDClassifier())
# clf = OneVsRestClassifier(SVC())
# clf = OneVsRestClassifier(LinearSVC()) # For larger data sets
mlb = MultiLabelBinarizer() # Creates a binary matrix with labels as columns, sorted by name. Samples as rows.
# OBS! Om en ingrediens är flera ord "färska örter" blir "färska" en ingrediens. Ordna (kanske).
y_binary_matrix = mlb.fit_transform(Y) # kolumner, labels, är ordnade efter bokstavsordning, enligt mlb.classes_
x_binary_matrix = vect.fit_transform(X) # kolumner är ordnade efter bokstavsordning, enligt vectorizer.get_feature_names()
train_x, test_x, train_y, test_y = train_test_split(x_binary_matrix, y_binary_matrix, test_size=0.1)
train_x, test_x, train_y, test_y = train_test_split(x_binary_matrix, y_binary_matrix, test_size=0.15)
# Lower test size => larger training size => better predict metrics
clf.fit(train_x, train_y)
......@@ -52,7 +64,7 @@ predictions = clf.predict(test_x)
customTestSample = vect.transform(['havregryn mjölk salt hallon', 'kycklingfärs ströbröd mjölk ägg persilja citron svartpeppar smör salt potatis', 'smör färskost ägg mandel hasselnöt råsocker vetemjöl'])
print(mlb.inverse_transform(clf.predict(customTestSample)))
result_metrics = metrics.classification_report(test_y, predictions, target_names=mlb.classes_)
result_metrics = metrics.classification_report(test_y, predictions, target_names=mlb.classes_, zero_division=True)
print(result_metrics)
......@@ -74,3 +86,4 @@ print(result_metrics)
"Måltid|Fest" får dålig prediction för att det finns 4 recept med det, dock många fler med Tillfällen|fest
Veckans middagar får dålig prediction för att..?=?
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment