%% function for running modeling on different input data sets % to be used in conjunction with the icdm09 regression model % comparison script % % % see http://blog.georgruss.de/?p=145 % by russ@iws.cs.uni-magdeburg.de % 2009-01-26 function modelcomparison(data,dataprefix,kfold,labels) % TODO: change it to be more flexible to the number of labels labels = transpose(labels); % labels of input attributes label_inputs = labels(:,1:7); % target label (probably YIELD'year') label_target = labels(:,8); dataprefix; %% modeling stage % partitioning parameters for cross validation k=kfold % k-fold holdout cross validation p = 1/k; % set_3 is just a legacy from older scripts set_3 = data; Size_set_3 = size(set_3); % normalize data (requires double transposition of data matrix) [set_3_norm, PS] = mapstd(transpose(set_3)); set_3_norm = transpose(set_3_norm); % where to store the results modelresults = struct('j',[],'mae_svm',[],'rmse_svm',[], 'mae_mlp',[], 'rmse_mlp', []); modelresults.mae_rbf = []; modelresults.rmse_rbf = []; modelresults.mae_regtree = []; modelresults.rmse_regtree = []; j_res=[]; for j = 1:k j % generate data partition (cvpartition from matlab's statistics toolbox) % size of holdout (test) set: 1/k % (yields indices of data set) cvdata = cvpartition(Size_set_3(1,1),'Holdout',p) % structured object to store actual, predicted and error values (for each % model) svmresults = struct('actual',[], 'prediction', [], 'abserr', [], 'squerr',[]); mlpresults = struct('actual',[], 'prediction', [], 'abserr', [], 'squerr',[]); rbfresults = struct('actual',[], 'prediction', [], 'abserr', [], 'squerr',[]); regtreeresults = struct('actual',[], 'prediction', [], 'abserr', [], 'squerr',[]); j_res = vertcat(j_res,j); TrainSet = struct(); TrainSet.all = set_3_norm(cvdata.training,:); TrainSet.size = size(TrainSet.all); TrainSet.examples = TrainSet.all(:,1:TrainSet.size(1,2)-1); TrainSet.targets = TrainSet.all(:,TrainSet.size(1,2)); TestSet = struct(); TestSet.all = set_3_norm(cvdata.test,:); TestSet.size = size(TestSet.all); TestSet.examples = TestSet.all(:,1:TestSet.size(1,2)-1); TestSet.targets = TestSet.all(:,TestSet.size(1,2)); % The regression tree requires (or should have) un-normalized % values for better understanding of the tree. % Hence, the same data set is used, but with % un-normalized values. TrainSet.all_unnorm = set_3(cvdata.training,:); TrainSet.examples_unnorm = TrainSet.all_unnorm(:,1:TrainSet.size(1,2)-1); TrainSet.targets_unnorm = TrainSet.all_unnorm(:,TrainSet.size(1,2)); TestSet.all_unnorm = set_3(cvdata.test,:); TestSet.size = size(TestSet.all); TestSet.examples_unnorm = TestSet.all_unnorm(:,1:TestSet.size(1,2)-1); TestSet.targets_unnorm = TestSet.all_unnorm(:,TestSet.size(1,2)); % -------------------------------------- % begin SVMTorch stuff % write training data to file dlmwrite('~/.matlab/svm-train',TrainSet.size,' '); dlmwrite('~/.matlab/svm-train',TrainSet.all,'-append','delimiter',' '); % write test data to file dlmwrite('~/.matlab/svm-test',TestSet.size,' '); dlmwrite('~/.matlab/svm-test',TestSet.all,'-append','delimiter',' '); %run svmtorch as the model % requires writing a script file first ... fid = fopen('~/.matlab/svmtorch.sh','w'); fprintf(fid,'%s\n%s%u%s\n','#! /bin/bash','SVMTorch -rm -t 2 -eps 0.2 -std 4 -c ',60,' ~/.matlab/svm-train ~/.matlab/svm-model'); fclose(fid); % ... and making it executable ... ! chmod 700 ~/.matlab/svmtorch.sh % ... before running it: % generate model ! ~/.matlab/svmtorch.sh % test model and store results in file 'svm-results' ! SVMTest -oa ~/.matlab/svm-results ~/.matlab/svm-model ~/.matlab/svm-test % remove model ! rm ~/.matlab/svm-model % read in model output on test values (from file) svmres_read_norm = dlmread('~/.matlab/svm-results'); % un-normalize the SVM results and TestSet.targets (see help mapstd for calculation) svmres_read = (svmres_read_norm - PS.ymean) * PS.xstd(8)/PS.ystd + PS.xmean(8); test_targets = (TestSet.targets - PS.ymean) * PS.xstd(8)/PS.ystd + PS.xmean(8); % append values svmresults.prediction = vertcat(svmresults.prediction,svmres_read); %svmresults.actual = vertcat(svmresults.actual,TestSet.targets); svmresults.actual = vertcat(svmresults.actual,test_targets); % end SVMTorch stuff % ---------------------------------------- % ---------------------------------------- % begin MLP stuff mlp = newff(transpose(TrainSet.examples),transpose(TrainSet.targets),[10]); mlp.trainParam.min_grad = 0.001; mlp.trainParam.epochs = 10000; mlp.trainParam.lr = 0.25; mlp.trainParam.showWindow = false; mlp.trainParam.showCommandLine = true; mlp.trainParam.show = 50; [mlptrained, mlptrainrecord] = train(mlp,transpose(TrainSet.examples),transpose(TrainSet.targets)); mlpresults.prediction = transpose(sim(mlptrained,transpose(TestSet.examples))); mlpresults.actual = TestSet.targets; % un-normalize mlp results to original ranges mlpresults.prediction = (mlpresults.prediction - PS.ymean) * PS.xstd(8)/PS.ystd + PS.xmean(8); mlpresults.actual = (mlpresults.actual - PS.ymean) * PS.xstd(8)/PS.ystd + PS.xmean(8); % end MLP stuff % ---------------------------------------- % ---------------------------------------- % begin RBF stuff target_error = 0; spread = 1; max_neurons = 70; df = 100; [rbf, tr] = newrb(transpose(TrainSet.examples),transpose(TrainSet.targets), target_error, spread, max_neurons, df); rbfresults.prediction = transpose(sim(rbf,transpose(TestSet.examples))); rbfresults.actual = TestSet.targets; % un-normalize rbf results to original ranges rbfresults.prediction = (rbfresults.prediction - PS.ymean) * PS.xstd(8)/PS.ystd + PS.xmean(8); rbfresults.actual = (rbfresults.actual - PS.ymean) * PS.xstd(8)/PS.ystd + PS.xmean(8); % end RBF stuff % ---------------------------------------- % ---------------------------------------- % begin regression tree stuff % construct tree tree = classregtree(TrainSet.examples_unnorm, TrainSet.targets_unnorm, 'names', label_inputs); %,y,param1,val1,param2,val2) % calculate prediction regtreeresults.prediction = eval(tree,TestSet.examples_unnorm); regtreeresults.actual = TestSet.targets_unnorm; % end regression tree stuff % ---------------------------------------- % ---------------------------------------- % collect error values % generate error measures and store them inside the respective results struct svmresults.abserr = abs(svmresults.actual - svmresults.prediction); svmresults.squerr = (svmresults.abserr).^2; mlpresults.abserr = abs(mlpresults.actual - mlpresults.prediction); mlpresults.squerr = (mlpresults.abserr).^2; rbfresults.abserr = abs(rbfresults.actual - rbfresults.prediction); rbfresults.squerr = (rbfresults.abserr).^2; regtreeresults.abserr = abs(regtreeresults.actual - regtreeresults.prediction); regtreeresults.squerr = (regtreeresults.abserr).^2; % calculate mae and rmse svmmae = mean(svmresults.abserr); svmrmse = sqrt(mean(svmresults.squerr)); mlpmae = mean(mlpresults.abserr); mlprmse = sqrt(mean(mlpresults.squerr)); rbfmae = mean(rbfresults.abserr); rbfrmse = sqrt(mean(rbfresults.squerr)); regtreemae = mean(regtreeresults.abserr); regtreermse = sqrt(mean(regtreeresults.squerr)); % store mae and rmse modelresults.mae_mlp = vertcat(modelresults.mae_mlp, mlpmae); modelresults.rmse_mlp = vertcat(modelresults.rmse_mlp, mlprmse); modelresults.mae_svm = vertcat(modelresults.mae_svm, svmmae); modelresults.rmse_svm = vertcat(modelresults.rmse_svm, svmrmse); modelresults.mae_rbf = vertcat(modelresults.mae_rbf, rbfmae); modelresults.rmse_rbf = vertcat(modelresults.rmse_rbf, rbfrmse); modelresults.mae_regtree = vertcat(modelresults.mae_regtree, regtreemae); modelresults.rmse_regtree = vertcat(modelresults.rmse_regtree, regtreermse); end %% Mostly visualization stuff below % There's an interesting "Handbook of Data Visualization" % doi:10.1007/978-3-540-33037-0 % plot mae and rmse against j (trial run number, simply collected) cd('~/.matlab') e = figure; clf; set(axes, 'FontSize', 14); plot(j_res,modelresults.mae_mlp,'-sk'); hold on; plot(j_res,modelresults.mae_svm,'-ok'); plot(j_res,modelresults.mae_rbf,'-xk'); plot(j_res,modelresults.mae_regtree,'-dk'); legend('mae mlp','mae svm', 'mae rbf', 'mae regtree'); title(char(strcat(dataprefix, ' -- mean absolute error of models'))); xlabel('trial #'); ylabel('error'); savefig(char(strcat(dataprefix,'-mae-all')),'eps','jpeg', '-rgb', '-c0.1', '-r250'); e = figure; clf; set(axes, 'FontSize', 14); plot(j_res,modelresults.rmse_mlp,'-sk'); hold on; plot(j_res,modelresults.rmse_svm,'-ok') plot(j_res,modelresults.rmse_rbf,'-xk') plot(j_res,modelresults.rmse_regtree,'-dk'); legend('rmse mlp','rmse svm', 'rmse rbf', 'rmse regtree'); title(char(strcat(dataprefix, ' -- root mean squared error of models'))); xlabel('trial #'); ylabel('error'); savefig(char(strcat(dataprefix,'-rmse-all')),'eps','jpeg', '-rgb', '-c0.1', '-r250'); %% compare models' prediction against each other visually (plotmatrix) % ----------------------------------------------- % first: the comparison of model predictions % ----------------------------------------------- results_combined = svmresults.prediction; results_combined = horzcat(results_combined, mlpresults.prediction); results_combined = horzcat(results_combined, rbfresults.prediction); results_combined = horzcat(results_combined, regtreeresults.prediction); pm1 = figure; clf; labels = {'SVM prediction', 'MLP prediction', 'RBF prediction', 'RegTree prediction'}; [pm_results, ax, bigax, P] = plotmatrix(results_combined); axes(bigax); title('Prediction of Models, comparison'); delete(P); for i = 1:4 txtax = axes('Position', get(ax(i,i), 'Position'), ... 'units','normalized'); text(.1, .5, labels{i}); set(txtax, 'xtick', [], 'ytick', [], ... 'xgrid', 'off', 'ygrid', 'off', 'box', 'on'); end savefig(char(strcat(dataprefix,'-comparison-prediction')),'eps','jpeg', '-rgb', '-c0.1', '-r250'); % ----------------------------------------------- % second: the comparison of model absolute errors % ----------------------------------------------- labels = {'SVM abs err', 'MLP abs err', 'RBF abs err', 'RegTree abs err'}; abserr_combined = svmresults.abserr; abserr_combined = horzcat(abserr_combined, mlpresults.abserr); abserr_combined = horzcat(abserr_combined, rbfresults.abserr); abserr_combined = horzcat(abserr_combined, regtreeresults.abserr); pm2 = figure; clf; [pm_abserr, ax, bigax, P] = plotmatrix(abserr_combined); axes(bigax); title('Absolute Errors of Models, comparison'); delete(P); for i = 1:4 txtax = axes('Position', get(ax(i,i), 'Position'), ... 'units','normalized'); text(.1, .5, labels{i}); set(txtax, 'xtick', [], 'ytick', [], ... 'xgrid', 'off', 'ygrid', 'off', 'box', 'on'); end savefig(char(strcat(dataprefix,'-comparison-abserr')),'eps','jpeg', '-rgb', '-c0.1', '-r250'); % store workspace for later use save(dataprefix);