%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Matlab script to perform cross validation on a model generated for % numeric data from agriculture % % purpose: data mining with SVMs (support vector machines) % % requires: % - statistics toolbox for cvpartition (from Matlab2008a) % - readColData script (see link below) % - SVMTorch (compiled version, of course) % - data structure used for the results (cv_results) % ----- % Georg Ru{\ss} % russ@iws.cs.uni-magdeburg.de % 2008-09-26 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %% Preparation steps, workspace % clean workspace clear all; % set clock clock_start = clock; % seed random for reproducible results rand('seed',1); %pause; % change paths to wherever your data is located and readable to matlab % uses script readColData from % http://web.cecs.pdx.edu/~gerry/MATLAB/plotting/loadingPlotData.html#colHeadings [label,id_column,data]=readColData('sorted_all',10,10,1); %% data specific stuff % generate three data sets for the nnet to play with % one: N1, Yield2003, EM38 -- target: Yield2004 % two: N1, Yield2003, EM38, N2, REIP32 -- target: Yield2004 % three: N1, Yield2003, EM38, N2, REIP32, N3, REIP49 -- target: Yield2004 % works by eliminating the respective columns from the 'data' matrix above set_1 = data; set_1(:,7) = []; set_1(:,5) = []; set_1(:,4) = []; set_1(:,3) = []; set_1(:,2) = []; set_2 = data; set_2(:,7) = []; set_2(:,5) = []; set_2(:,3) = []; % only set_3 is actually used here set_3 = data; set_3(:,7) = []; Size_set_3 = size(set_3); %% modeling stage % struct to store actual, predicted and error values cv_results = struct('actual',[], 'prediction', [], 'abserr', [], 'squerr', []); k=10; % k-fold holdout cross validation p = 1/k; % generate data partition cvdata = cvpartition(Size_set_3(1,1),'Holdout',p) for i = 1:k; TrainSet = set_3(cvdata.training,:); TestSet = set_3(cvdata.test,:); dlmwrite('svm-train',size(TrainSet),' '); dlmwrite('svm-train',TrainSet,'-append','delimiter',' '); dlmwrite('svm-test',size(TestSet),' '); dlmwrite('svm-test',TestSet,'-append','delimiter',' '); % run svmtorch as the model ! SVMTorch -rm svm-train svm-model ! SVMTest -oa svm-results svm-model svm-test ! rm svm-model % read in model output on test values res_read = dlmread('svm-results'); % append values cv_results.prediction = vertcat(cv_results.prediction,res_read); cv_results.actual = vertcat(cv_results.actual,TestSet(:,8)); i end %% generate error measures into cv_results cv_results.abserr = abs(cv_results.actual - cv_results.prediction); cv_results.squerr = (cv_results.abserr).^2; mae = mean(cv_results.abserr) rmse = sqrt(mean(cv_results.squerr))