diff --git a/lab3/Lab3Block1_2021_SVMs_St.R b/lab3/Lab3Block1_2021_SVMs_St.R index a14d097fa634c679573811f7788a6378a2895082..53af0feef20a2d2dc77c11ab62cd50bd3b1ba088 100644 --- a/lab3/Lab3Block1_2021_SVMs_St.R +++ b/lab3/Lab3Block1_2021_SVMs_St.R @@ -7,43 +7,78 @@ set.seed(1234567890) data(spam) foo <- sample(nrow(spam)) -spam <- spam[foo,] +spam <- spam[foo, ] tr <- spam[1:3000, ] va <- spam[3001:3800, ] trva <- spam[1:3800, ] -te <- spam[3801:4601, ] +te <- spam[3801:4601, ] by <- 0.3 err_va <- NULL -for(i in seq(by,5,by)){ - filter <- ksvm(type~.,data=tr,kernel="rbfdot",kpar=list(sigma=0.05),C=i,scaled=FALSE) - mailtype <- predict(filter,va[,-58]) - t <- table(mailtype,va[,58]) - err_va <-c(err_va,(t[1,2]+t[2,1])/sum(t)) +for (i in seq(by, 5, by)) { + filter <- ksvm( + type ~ ., + data = tr, + kernel = "rbfdot", + kpar = list(sigma = 0.05), + C = i, + scaled = FALSE + ) + mailtype <- predict(filter, va[, -58]) + t <- table(mailtype, va[, 58]) + err_va <- c(err_va, (t[1, 2] + t[2, 1]) / sum(t)) } -filter0 <- ksvm(type~.,data=tr,kernel="rbfdot",kpar=list(sigma=0.05),C=which.min(err_va)*by,scaled=FALSE) -mailtype <- predict(filter0,va[,-58]) -t <- table(mailtype,va[,58]) -err0 <- (t[1,2]+t[2,1])/sum(t) +filter0 <- ksvm( + type ~ ., + data = tr, + kernel = "rbfdot", + kpar = list(sigma = 0.05), + C = which.min(err_va) * by, + scaled = FALSE +) +mailtype <- predict(filter0, va[, -58]) +t <- table(mailtype, va[, 58]) +err0 <- (t[1, 2] + t[2, 1]) / sum(t) err0 -filter1 <- ksvm(type~.,data=tr,kernel="rbfdot",kpar=list(sigma=0.05),C=which.min(err_va)*by,scaled=FALSE) -mailtype <- predict(filter1,te[,-58]) -t <- table(mailtype,te[,58]) -err1 <- (t[1,2]+t[2,1])/sum(t) +filter1 <- ksvm( + type ~ ., + data = tr, + kernel = "rbfdot", + kpar = list(sigma = 0.05), + C = which.min(err_va) * by, + scaled = FALSE +) +mailtype <- predict(filter1, te[, -58]) +t <- table(mailtype, te[, 58]) +err1 <- (t[1, 2] + t[2, 1]) / sum(t) err1 -filter2 <- ksvm(type~.,data=trva,kernel="rbfdot",kpar=list(sigma=0.05),C=which.min(err_va)*by,scaled=FALSE) -mailtype <- predict(filter2,te[,-58]) -t <- table(mailtype,te[,58]) -err2 <- (t[1,2]+t[2,1])/sum(t) +filter2 <- ksvm( + type ~ ., + data = trva, + kernel = "rbfdot", + kpar = list(sigma = 0.05), + C = which.min(err_va) * by, + scaled = FALSE +) +mailtype <- predict(filter2, te[, -58]) +t <- table(mailtype, te[, 58]) +err2 <- (t[1, 2] + t[2, 1]) / sum(t) err2 -filter3 <- ksvm(type~.,data=spam,kernel="rbfdot",kpar=list(sigma=0.05),C=which.min(err_va)*by,scaled=FALSE) -mailtype <- predict(filter3,te[,-58]) -t <- table(mailtype,te[,58]) -err3 <- (t[1,2]+t[2,1])/sum(t) +filter3 <- ksvm( + type ~ ., + data = spam, + kernel = "rbfdot", + kpar = list(sigma = 0.05), + C = which.min(err_va) * by, + scaled = FALSE +) +mailtype <- predict(filter3, te[, -58]) +t <- table(mailtype, te[, 58]) +err3 <- (t[1, 2] + t[2, 1]) / sum(t) err3 # Questions @@ -58,25 +93,30 @@ gaussian_kernel <- function(x_i, x_star, sigma) { return(exp(-sum((x_i - x_star)^2) / (2 * sigma^2))) } -sv<-alphaindex(filter3)[[1]] -co<-coef(filter3)[[1]] -inte<- - b(filter3) -k<-NULL -for(i in 1:10){ # We produce predictions for just the first 10 points in the dataset. - k2<-0 - test_point <- spam[i, -58] - for(j in 1:length(sv)){ - - support_vector <- spam[sv[j], -58] - - kernel_value <- gaussian_kernel(support_vector, test_point, sigma = 0.05) +sv <- alphaindex(filter3)[[1]] +co <- coef(filter3)[[1]] +inte <- -b(filter3) +k <- NULL +for (i in 1:10) { + # We produce predictions for just the first 10 points in the dataset. + + k2 <- 0 + data_point <- spam[i, -58] + + for (j in 1:length(sv)) { + support_vector <- spam[sv[j], -58] + kernel_value <- gaussian_kernel(support_vector, data_point, sigma = 0.05) k2 <- k2 + co[j] * kernel_value } + k2 <- k2 + inte + print(k2) k <- c(k, sign(k2)) } + +# Only first correct, close to decision boundary (0.006292512). k -predict(filter3,spam[1:10,-58], type = "decision") +predict(filter3,spam[1:10,-58], type = "decision") \ No newline at end of file