Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define MAX_DEPTH 5
- //#include "Files.h"
- #include <climits>
- #include <iostream>
- #include <vector>
- #include <bitset>
- #include <unordered_map>
- #include <chrono>
- #include <filesystem>
- int D, label, N = 0, dim = 0, feature;
- std::vector<int> labels, ftrs;
- std::vector<std::vector<int>> features;
- std::string line;
- inline size_t hash_vector(const std::vector<int> &vec) {
- std::size_t seed = vec.size();
- for (auto &i: vec) {
- seed ^= i + 0x9e3779b9 + (seed << 6) + (seed >> 2);
- }
- return seed;
- }
- std::unordered_map<size_t, int> dp[MAX_DEPTH];
- std::bitset<500> featuresAvailable;
- inline int makeSplit(int depth, const std::vector<int> &indices, const std::vector<int> &parent_indices) {
- int ans = INT_MAX;
- size_t hash_value = hash_vector(indices);
- if (dp[depth].find(hash_value) != dp[depth].end())
- return dp[depth][hash_value];
- if (depth == D - 1) { // D = 2 speed-up case
- for (int splitFeature = 0; splitFeature < dim; ++splitFeature)
- if (featuresAvailable[splitFeature]) {
- int c0_0 = 0, c0_1 = 0, c1_0 = 0, c1_1 = 0; // c1_0 -> negative labels with feature == 1
- for (const int &idx: indices) {
- const std::vector<int> &v = features[idx];
- if (v[splitFeature] == 0) {
- if (labels[idx] == 0)
- c0_0++;
- else
- c0_1++;
- } else {
- if (labels[idx] == 0)
- c1_0++;
- else
- c1_1++;
- }
- }
- int leftMissmatch = std::min(c0_0, c0_1);
- int rightMissmatch = std::min(c1_0, c1_1);
- ans = std::min(ans, leftMissmatch + rightMissmatch);
- }
- dp[depth][hash_value] = ans;
- return ans;
- } else {
- for (int splitFeature = 0; splitFeature < dim; ++splitFeature)
- if (featuresAvailable[splitFeature]) {
- featuresAvailable[splitFeature] = false;
- std::vector<int> negativeIndices{};
- std::vector<int> positiveIndices{};
- for (const int &idx: indices) {
- const std::vector<int> &v = features[idx];
- if (v[splitFeature] == 0)
- negativeIndices.emplace_back(idx);
- else
- positiveIndices.emplace_back(idx);
- }
- int leftMissmatch = 0, rightMissmatch = 0;
- leftMissmatch = makeSplit(depth + 1, negativeIndices, indices);
- if (depth == 0)
- rightMissmatch = makeSplit(depth + 1, positiveIndices, indices);
- else {
- size_t parent_hash_value = hash_vector(parent_indices);
- if (dp[depth - 1].find(parent_hash_value) == dp[depth - 1].end() ||
- leftMissmatch < dp[depth - 1][parent_hash_value])
- rightMissmatch = makeSplit(depth + 1, positiveIndices, indices);
- }
- ans = std::min(ans, leftMissmatch + rightMissmatch);
- featuresAvailable[splitFeature] = true;
- if (ans == 0)
- break;
- }
- }
- dp[depth][hash_value] = ans;
- return ans;
- }
- inline std::vector<int> parseInputSlowStringStream() {
- std::stringstream ss;
- std::getline(std::cin, line);
- ss << line;
- ss >> D;
- ss.clear();
- while (std::getline(std::cin, line)) {
- ss << line;
- ss >> label;
- labels.emplace_back(label);
- ftrs = std::vector<int>{};
- while (ss >> feature)
- ftrs.emplace_back(feature);
- features.emplace_back(ftrs);
- ss.clear();
- }
- N = features.size(); // # of datapoints
- dim = features[0].size();
- std::vector<int> indices(N);
- for (int i = 0; i < N; ++i)
- indices[i] = i;
- for (int i = 0; i < dim; ++i)
- featuresAvailable[i] = true;
- return indices;
- }
- inline void rwFrom(const char *read_filename, const char *write_filename) {
- // Speed-up reading
- std::ios::sync_with_stdio(false);
- std::cin.tie(NULL);
- freopen(read_filename, "r", stdin);
- freopen(write_filename, "w", stdout);
- }
- int main() {
- // std::cout << "we are currently in: " << std::filesystem::current_path() << '\n';
- //
- // auto start = std::chrono::steady_clock::now();
- //
- // rwFrom(Files::mushroom_in, Files::mushroom_out);
- std::vector<int> indices = parseInputSlowStringStream();
- std::cout << makeSplit(0, indices, indices) << '\n'; // Solve
- // auto end = std::chrono::steady_clock::now();
- // std::cout << "Elapsed time: "
- // << std::chrono::duration_cast<std::chrono::seconds>(end - start).count()
- // << " sec";
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement