/*
File: main.cpp
To compile on linprog4.cs.fsu.edu: g++47 -o main.exe main.cpp -std=c++11 -O3 -Wall -Wextra -Werror -I. -I/usr/local/include/ -I/usr/local/include/boost_1.53.0/ -L/usr/local/lib64/ -lopencv_calib3d -lopencv_contrib -lopencv_core -lopencv_features2d -lopencv_flann -lopencv_gpu -lopencv_highgui -lopencv_imgproc -lopencv_legacy -lopencv_ml -lopencv_nonfree -lopencv_objdetect -lopencv_photo -lopencv_stitching -lopencv_ts -lopencv_video -lopencv_videostab -Wl,-rpath,/usr/local/lib64/ && ./main.exe
*/

#include <iostream>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>

int main () {

	std::string filename = "wdbc.data.txt";
	int const data_class_col = 1;
	std::vector<int> irrelevant_col_indices;
	irrelevant_col_indices.push_back( 0 );

	CvMLData ml_data;
	ml_data.read_csv( filename.c_str() );
	ml_data.set_response_idx( data_class_col );
	auto irrelevant_col_indices_i = std::begin( irrelevant_col_indices );
	auto irrelevant_col_indices_end = std::end( irrelevant_col_indices );
	while ( irrelevant_col_indices_i != irrelevant_col_indices_end ) {
		auto irrelevant_col = *irrelevant_col_indices_i;
		ml_data.change_var_idx( irrelevant_col, false );
		++irrelevant_col_indices_i;
	}
	auto class_labels = ml_data.get_class_labels_map();
	auto class_labels_i = std::begin( class_labels );
	auto class_labels_end = std::end( class_labels );
	int number_of_classes = 0;
	while ( class_labels_i != class_labels_end ) {
		std::cout << class_labels_i->first << " : " << class_labels_i->second << '\n';
		++class_labels_i;
		++number_of_classes;
	}
	std::cout << "number_of_classes: " << number_of_classes << '\n';

	float const train_sample_percentage = 0.90f;
	bool const should_mix = true;
	CvTrainTestSplit split( train_sample_percentage, should_mix );

	ml_data.set_train_test_split( &split );

	cv::Mat_<int> train_indices( ml_data.get_train_sample_idx() );
	std::cout << "train_indices( rows: " << train_indices.rows << ", cols: " << train_indices.cols << " ):\n" << train_indices << '\n';
	int const train_indices_row = 0;

	cv::Mat_<int> test_indices( ml_data.get_test_sample_idx() );
	std::cout << "test_indices( rows: " << test_indices.rows << ", cols: " << test_indices.cols << " ):\n" << test_indices << '\n';
	int const test_indices_row = 0;

	cv::Mat_<int> col_indices( ml_data.get_var_idx() );
	std::cout << "col_indices( rows: " << col_indices.rows << ", cols: " << col_indices.cols << " ):\n" << col_indices << '\n';
	int const col_indices_row = 0;

	cv::Mat_<float> data( ml_data.get_values() );

	cv::Mat_<float> training_vectors( train_indices.cols, col_indices.cols );

	int training_vectors_row = 0;
	for ( int train_indices_col = 0; train_indices_col < train_indices.cols; ++train_indices_col ) {
		int training_vectors_col = 0;
		auto data_row = train_indices( train_indices_row, train_indices_col );
		for ( int col_indices_col = 0; col_indices_col < col_indices.cols; ++col_indices_col ) {
			auto data_col = col_indices( col_indices_row, col_indices_col );
			auto data_value = data( data_row, data_col );
			training_vectors( training_vectors_row, training_vectors_col ) = data_value;
			++training_vectors_col;
		}
		++training_vectors_row;
	}

	std::cout << "training_vectors( rows: " << training_vectors.rows << ", cols: " << training_vectors.cols << " ):\n" << training_vectors << '\n';


	int const training_classes_cols = 1;
	cv::Mat_<float> training_classes( train_indices.cols, training_classes_cols );
	int const training_classes_col = 0;

	int training_classes_row = 0;
	for ( int train_indices_col = 0; train_indices_col < train_indices.cols; ++train_indices_col ) {
		auto data_row = train_indices( train_indices_row, train_indices_col );
		auto data_value = data( data_row, data_class_col );
		training_classes( training_classes_row, training_classes_col ) = data_value;
		++training_classes_row;
	}

	std::cout << "training_classes( rows: " << training_classes.rows << ", cols: " << training_classes.cols << " ):\n" << training_classes << '\n';


	cv::Mat_<float> testing_vectors( test_indices.cols, col_indices.cols );

	int testing_vectors_row = 0;
	for ( int test_indices_col = 0; test_indices_col < test_indices.cols; ++test_indices_col ) {
		int testing_vectors_col = 0;
		auto data_row = test_indices( test_indices_row, test_indices_col );
		for ( int col_indices_col = 0; col_indices_col < col_indices.cols; ++col_indices_col ) {
			auto data_col = col_indices( col_indices_row, col_indices_col );
			auto data_value = data( data_row, data_col );
			testing_vectors( testing_vectors_row, testing_vectors_col ) = data_value;
			++testing_vectors_col;
		}
		++testing_vectors_row;
	}

	std::cout << "testing_vectors( rows: " << testing_vectors.rows << ", cols: " << testing_vectors.cols << " ):\n" << testing_vectors << '\n';


	int const testing_classes_cols = 1;
	cv::Mat_<float> testing_classes( test_indices.cols, testing_classes_cols );
	int const testing_classes_col = 0;

	int testing_classes_row = 0;
	for ( int test_indices_col = 0; test_indices_col < test_indices.cols; ++test_indices_col ) {
		auto data_row = test_indices( test_indices_row, test_indices_col );
		auto data_value = data( data_row, data_class_col );
		testing_classes( testing_classes_row, testing_classes_col ) = data_value;
		++testing_classes_row;
	}

	std::cout << "testing_classes( rows: " << testing_classes.rows << ", cols: " << testing_classes.cols << " ):\n" << testing_classes << '\n';


	int const k = 3;
	CvKNearest knn;
	knn.train( training_vectors, training_classes );
	int const testing_classes_predictions_cols = 1;
	cv::Mat_<float> testing_classes_predictions( testing_vectors.rows, testing_classes_predictions_cols );
	cv::Mat_<float> neighbor_classes( testing_vectors.rows, k );
	cv::Mat_<float> distances( testing_vectors.rows, k );
	knn.find_nearest( testing_vectors, k, testing_classes_predictions, neighbor_classes, distances );

	std::cout << "testing_classes_predictions( rows: " << testing_classes_predictions.rows << ", cols: " << testing_classes_predictions.cols << " ):\n" << testing_classes_predictions << '\n';
	std::cout << "neighbor_classes( rows: " << neighbor_classes.rows << ", cols: " << neighbor_classes.cols << " ):\n" << neighbor_classes << '\n';
	std::cout << "distances( rows: " << distances.rows << ", cols: " << distances.cols << " ):\n" << distances << '\n';

}