/* File: main.cpp To compile on linprog4.cs.fsu.edu: g++47 -o main.exe main.cpp -std=c++11 -O3 -Wall -Wextra -Werror -I. -I/usr/local/include/ -I/usr/local/include/boost_1.53.0/ -L/usr/local/lib64/ -lopencv_calib3d -lopencv_contrib -lopencv_core -lopencv_features2d -lopencv_flann -lopencv_gpu -lopencv_highgui -lopencv_imgproc -lopencv_legacy -lopencv_ml -lopencv_nonfree -lopencv_objdetect -lopencv_photo -lopencv_stitching -lopencv_ts -lopencv_video -lopencv_videostab -Wl,-rpath,/usr/local/lib64/ && ./main.exe */ #include #include #include #include int main () { std::string filename = "wdbc.data.txt"; int const data_class_col = 1; std::vector irrelevant_col_indices; irrelevant_col_indices.push_back( 0 ); CvMLData ml_data; ml_data.read_csv( filename.c_str() ); ml_data.set_response_idx( data_class_col ); auto irrelevant_col_indices_i = std::begin( irrelevant_col_indices ); auto irrelevant_col_indices_end = std::end( irrelevant_col_indices ); while ( irrelevant_col_indices_i != irrelevant_col_indices_end ) { auto irrelevant_col = *irrelevant_col_indices_i; ml_data.change_var_idx( irrelevant_col, false ); ++irrelevant_col_indices_i; } auto class_labels = ml_data.get_class_labels_map(); auto class_labels_i = std::begin( class_labels ); auto class_labels_end = std::end( class_labels ); int number_of_classes = 0; while ( class_labels_i != class_labels_end ) { std::cout << class_labels_i->first << " : " << class_labels_i->second << '\n'; ++class_labels_i; ++number_of_classes; } std::cout << "number_of_classes: " << number_of_classes << '\n'; float const train_sample_percentage = 0.90f; bool const should_mix = true; CvTrainTestSplit split( train_sample_percentage, should_mix ); ml_data.set_train_test_split( &split ); cv::Mat_ train_indices( ml_data.get_train_sample_idx() ); std::cout << "train_indices( rows: " << train_indices.rows << ", cols: " << train_indices.cols << " ):\n" << train_indices << '\n'; int const train_indices_row = 0; cv::Mat_ test_indices( ml_data.get_test_sample_idx() ); std::cout << "test_indices( rows: " << test_indices.rows << ", cols: " << test_indices.cols << " ):\n" << test_indices << '\n'; int const test_indices_row = 0; cv::Mat_ col_indices( ml_data.get_var_idx() ); std::cout << "col_indices( rows: " << col_indices.rows << ", cols: " << col_indices.cols << " ):\n" << col_indices << '\n'; int const col_indices_row = 0; cv::Mat_ data( ml_data.get_values() ); cv::Mat_ training_vectors( train_indices.cols, col_indices.cols ); int training_vectors_row = 0; for ( int train_indices_col = 0; train_indices_col < train_indices.cols; ++train_indices_col ) { int training_vectors_col = 0; auto data_row = train_indices( train_indices_row, train_indices_col ); for ( int col_indices_col = 0; col_indices_col < col_indices.cols; ++col_indices_col ) { auto data_col = col_indices( col_indices_row, col_indices_col ); auto data_value = data( data_row, data_col ); training_vectors( training_vectors_row, training_vectors_col ) = data_value; ++training_vectors_col; } ++training_vectors_row; } std::cout << "training_vectors( rows: " << training_vectors.rows << ", cols: " << training_vectors.cols << " ):\n" << training_vectors << '\n'; int const training_classes_cols = 1; cv::Mat_ training_classes( train_indices.cols, training_classes_cols ); int const training_classes_col = 0; int training_classes_row = 0; for ( int train_indices_col = 0; train_indices_col < train_indices.cols; ++train_indices_col ) { auto data_row = train_indices( train_indices_row, train_indices_col ); auto data_value = data( data_row, data_class_col ); training_classes( training_classes_row, training_classes_col ) = data_value; ++training_classes_row; } std::cout << "training_classes( rows: " << training_classes.rows << ", cols: " << training_classes.cols << " ):\n" << training_classes << '\n'; cv::Mat_ testing_vectors( test_indices.cols, col_indices.cols ); int testing_vectors_row = 0; for ( int test_indices_col = 0; test_indices_col < test_indices.cols; ++test_indices_col ) { int testing_vectors_col = 0; auto data_row = test_indices( test_indices_row, test_indices_col ); for ( int col_indices_col = 0; col_indices_col < col_indices.cols; ++col_indices_col ) { auto data_col = col_indices( col_indices_row, col_indices_col ); auto data_value = data( data_row, data_col ); testing_vectors( testing_vectors_row, testing_vectors_col ) = data_value; ++testing_vectors_col; } ++testing_vectors_row; } std::cout << "testing_vectors( rows: " << testing_vectors.rows << ", cols: " << testing_vectors.cols << " ):\n" << testing_vectors << '\n'; int const testing_classes_cols = 1; cv::Mat_ testing_classes( test_indices.cols, testing_classes_cols ); int const testing_classes_col = 0; int testing_classes_row = 0; for ( int test_indices_col = 0; test_indices_col < test_indices.cols; ++test_indices_col ) { auto data_row = test_indices( test_indices_row, test_indices_col ); auto data_value = data( data_row, data_class_col ); testing_classes( testing_classes_row, testing_classes_col ) = data_value; ++testing_classes_row; } std::cout << "testing_classes( rows: " << testing_classes.rows << ", cols: " << testing_classes.cols << " ):\n" << testing_classes << '\n'; int const k = 3; CvKNearest knn; knn.train( training_vectors, training_classes ); int const testing_classes_predictions_cols = 1; cv::Mat_ testing_classes_predictions( testing_vectors.rows, testing_classes_predictions_cols ); cv::Mat_ neighbor_classes( testing_vectors.rows, k ); cv::Mat_ distances( testing_vectors.rows, k ); knn.find_nearest( testing_vectors, k, testing_classes_predictions, neighbor_classes, distances ); std::cout << "testing_classes_predictions( rows: " << testing_classes_predictions.rows << ", cols: " << testing_classes_predictions.cols << " ):\n" << testing_classes_predictions << '\n'; std::cout << "neighbor_classes( rows: " << neighbor_classes.rows << ", cols: " << neighbor_classes.cols << " ):\n" << neighbor_classes << '\n'; std::cout << "distances( rows: " << distances.rows << ", cols: " << distances.cols << " ):\n" << distances << '\n'; }