%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Matlab script to perform cross validation on a model generated for % numeric data from agriculture % % purpose: data mining with SVMs (support vector machines) % % requires: % -my splitCV script % -readColData script % -SVMTorch (compiled version, of course) % ----- % Georg Ru{\ss} % russ@iws.cs.uni-magdeburg.de % 2008-09-24 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Preparation steps, workspace % clean workspace clear all; % set clock clock_start = clock; % seed random for reproducible results rand('seed',1); %pause; % change paths to wherever your data is located and readable to matlab % uses script readColData from % http://web.cecs.pdx.edu/~gerry/MATLAB/plotting/loadingPlotData.html#colHeadings [label,id_column,data]=readColData('sorted_all',10,10,1); %% data specific stuff % generate three data sets for the nnet to play with % one: N1, Yield2003, EM38 -- target: Yield2004 % two: N1, Yield2003, EM38, N2, REIP32 -- target: Yield2004 % three: N1, Yield2003, EM38, N2, REIP32, N3, REIP49 -- target: Yield2004 % works by eliminating the respective columns from the 'data' matrix above set_1 = data; set_1(:,7) = []; set_1(:,5) = []; set_1(:,4) = []; set_1(:,3) = []; set_1(:,2) = []; set_2 = data; set_2(:,7) = []; set_2(:,5) = []; set_2(:,3) = []; set_3 = data; set_3(:,7) = []; %% Size_set_3 = size(set_3); % matrix to store real values vs. values from SVM regression model %cv_results = zeros(Size_set_3(1,2),2); cv_results = [] k = 10; for i = 1:k; [TrainSet TestSet] = splitCV(set_3, k, i); dlmwrite('svm-train',size(TrainSet),' '); dlmwrite('svm-train',TrainSet,'-append','delimiter',' '); dlmwrite('svm-test',size(TestSet),' '); dlmwrite('svm-test',TestSet,'-append','delimiter',' '); ! SVMTorch -rm svm-train svm-model ! SVMTest -oa svm-results svm-model svm-test ! rm svm-model res_read = dlmread('svm-results'); step_res = horzcat(TestSet(:,8), res_read); cv_results = vertcat(cv_results,step_res); i end %% generate error measures into cv_results rows = size(cv_results,1) % absolute error emptycol = zeros(rows,1) cv_results = horzcat(cv_results,emptycol) cv_results = horzcat(cv_results,emptycol) for i = 1:rows cv_results(i,3) = abs(cv_results(i,1)-cv_results(i,2)); cv_results(i,4) = (cv_results(i,3)).^2; end means = mean(cv_results); mae = means(1,3) rmse = sqrt(means(1,4))