12b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// This file is part of Eigen, a lightweight C++ template library 22b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// for linear algebra. 32b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// 42b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Copyright (C) 2015 Benoit Jacob <benoitjacob@google.com> 52b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// 62b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// This Source Code Form is subject to the terms of the Mozilla 72b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// Public License v. 2.0. If a copy of the MPL was not distributed 82b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 92b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <iostream> 112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <cstdint> 122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <cstdlib> 132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <vector> 142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <algorithm> 152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <fstream> 162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <string> 172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <cmath> 182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <cassert> 192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <cstring> 202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <memory> 212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang#include <Eigen/Core> 232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangusing namespace std; 252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangconst int default_precision = 4; 272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// see --only-cubic-sizes 292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangbool only_cubic_sizes = false; 302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// see --dump-tables 322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangbool dump_tables = false; 332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wanguint8_t log2_pot(size_t x) { 352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t l = 0; 362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (x >>= 1) l++; 372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return l; 382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wanguint16_t compact_size_triple(size_t k, size_t m, size_t n) 412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n); 432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang// just a helper to store a triple of K,M,N sizes for matrix product 462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct size_triple_t 472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t k, m, n; 492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_triple_t() : k(0), m(0), n(0) {} 502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {} 512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {} 522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_triple_t(uint16_t compact) 532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang k = 1 << ((compact & 0xf00) >> 8); 552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang m = 1 << ((compact & 0x0f0) >> 4); 562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang n = 1 << ((compact & 0x00f) >> 0); 572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang bool is_cubic() const { return k == m && m == n; } 592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangostream& operator<<(ostream& s, const size_triple_t& t) 622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return s << "(" << t.k << ", " << t.m << ", " << t.n << ")"; 642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct inputfile_entry_t 672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t product_size; 692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t pot_block_size; 702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_triple_t nonpot_block_size; 712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float gflops; 722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct inputfile_t 752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang enum class type_t { 772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang unknown, 782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang all_pot_sizes, 792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang default_sizes 802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang }; 812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang string filename; 832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<inputfile_entry_t> entries; 842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang type_t type; 852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang inputfile_t(const string& fname) 872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang : filename(fname) 882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang , type(type_t::unknown) 892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ifstream stream(filename); 912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (!stream.is_open()) { 922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "couldn't open input file: " << filename << endl; 932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang string line; 962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (getline(stream, line)) { 972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (line.empty()) continue; 982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (line.find("BEGIN MEASUREMENTS ALL POT SIZES") == 0) { 992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (type != type_t::unknown) { 1002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines"; 1012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 1022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang type = type_t::all_pot_sizes; 1042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang continue; 1052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (line.find("BEGIN MEASUREMENTS DEFAULT SIZES") == 0) { 1072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (type != type_t::unknown) { 1082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "Input file " << filename << " contains redundant BEGIN MEASUREMENTS lines"; 1092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 1102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang type = type_t::default_sizes; 1122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang continue; 1132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (type == type_t::unknown) { 1172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang continue; 1182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang switch(type) { 1202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case type_t::all_pot_sizes: { 1212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang unsigned int product_size, block_size; 1222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float gflops; 1232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang int sscanf_result = 1242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sscanf(line.c_str(), "%x %x %f", 1252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang &product_size, 1262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang &block_size, 1272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang &gflops); 1282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (3 != sscanf_result || 1292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang !product_size || 1302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang product_size > 0xfff || 1312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang !block_size || 1322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang block_size > 0xfff || 1332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang !isfinite(gflops)) 1342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 1352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "ill-formed input file: " << filename << endl; 1362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "offending line:" << endl << line << endl; 1372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 1382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) { 1402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang continue; 1412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang inputfile_entry_t entry; 1432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.product_size = uint16_t(product_size); 1442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.pot_block_size = uint16_t(block_size); 1452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.gflops = gflops; 1462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entries.push_back(entry); 1472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 1482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case type_t::default_sizes: { 1502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang unsigned int product_size; 1512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float gflops; 1522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang int bk, bm, bn; 1532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang int sscanf_result = 1542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sscanf(line.c_str(), "%x default(%d, %d, %d) %f", 1552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang &product_size, 1562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang &bk, &bm, &bn, 1572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang &gflops); 1582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (5 != sscanf_result || 1592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang !product_size || 1602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang product_size > 0xfff || 1612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang !isfinite(gflops)) 1622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 1632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "ill-formed input file: " << filename << endl; 1642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "offending line:" << endl << line << endl; 1652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 1662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (only_cubic_sizes && !size_triple_t(product_size).is_cubic()) { 1682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang continue; 1692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang inputfile_entry_t entry; 1712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.product_size = uint16_t(product_size); 1722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.pot_block_size = 0; 1732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.nonpot_block_size = size_triple_t(bk, bm, bn); 1742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.gflops = gflops; 1752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entries.push_back(entry); 1762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 1772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang default: 1802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 1812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang stream.close(); 1842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (type == type_t::unknown) { 1852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "Unrecognized input file " << filename << endl; 1862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 1872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (entries.empty()) { 1892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "didn't find any measurements in input file: " << filename << endl; 1902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 1912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 1932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 1942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 1952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct preprocessed_inputfile_entry_t 1962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 1972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t product_size; 1982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t block_size; 1992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float efficiency; 2012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 2022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangbool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) 2042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 2052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return e1.efficiency < e2.efficiency; 2062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 2072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct preprocessed_inputfile_t 2092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 2102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang string filename; 2112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<preprocessed_inputfile_entry_t> entries; 2122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang preprocessed_inputfile_t(const inputfile_t& inputfile) 2142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang : filename(inputfile.filename) 2152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 2162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (inputfile.type != inputfile_t::type_t::all_pot_sizes) { 2172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang abort(); 2182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang auto it = inputfile.entries.begin(); 2202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang auto it_first_with_given_product_size = it; 2212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (it != inputfile.entries.end()) { 2222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ++it; 2232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (it == inputfile.entries.end() || 2242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang it->product_size != it_first_with_given_product_size->product_size) 2252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 2262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang import_input_file_range_one_product_size(it_first_with_given_product_size, it); 2272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang it_first_with_given_product_size = it; 2282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangprivate: 2332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang void import_input_file_range_one_product_size( 2342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<inputfile_entry_t>::const_iterator& begin, 2352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<inputfile_entry_t>::const_iterator& end) 2362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 2372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t product_size = begin->product_size; 2382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float max_gflops = 0.0f; 2392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it = begin; it != end; ++it) { 2402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (it->product_size != product_size) { 2412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "Unexpected ordering of entries in " << filename << endl; 2422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "(Expected all entries for product size " << hex << product_size << dec << " to be grouped)" << endl; 2432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 2442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang max_gflops = max(max_gflops, it->gflops); 2462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it = begin; it != end; ++it) { 2482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang preprocessed_inputfile_entry_t entry; 2492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.product_size = it->product_size; 2502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.block_size = it->pot_block_size; 2512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.efficiency = it->gflops / max_gflops; 2522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entries.push_back(entry); 2532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 2562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangvoid check_all_files_in_same_exact_order( 2582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<preprocessed_inputfile_t>& preprocessed_inputfiles) 2592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 2602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (preprocessed_inputfiles.empty()) { 2612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return; 2622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[0]; 2652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const size_t num_entries = first_file.entries.size(); 2662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) { 2682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (preprocessed_inputfiles[i].entries.size() != num_entries) { 2692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "these files have different number of entries: " 2702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << preprocessed_inputfiles[i].filename 2712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " and " 2722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << first_file.filename 2732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << endl; 2742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 2752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t entry_index = 0; entry_index < num_entries; entry_index++) { 2792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const uint16_t entry_product_size = first_file.entries[entry_index].product_size; 2802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const uint16_t entry_block_size = first_file.entries[entry_index].block_size; 2812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t file_index = 0; file_index < preprocessed_inputfiles.size(); file_index++) { 2822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const preprocessed_inputfile_t& cur_file = preprocessed_inputfiles[file_index]; 2832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (cur_file.entries[entry_index].product_size != entry_product_size || 2842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cur_file.entries[entry_index].block_size != entry_block_size) 2852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 2862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "entries not in same order between these files: " 2872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << first_file.filename 2882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " and " 2892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << cur_file.filename 2902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << endl; 2912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 2922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 2952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 2962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 2972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangfloat efficiency_of_subset( 2982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<preprocessed_inputfile_t>& preprocessed_inputfiles, 2992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<size_t>& subset) 3002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (subset.size() <= 1) { 3022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return 1.0f; 3032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]]; 3052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const size_t num_entries = first_file.entries.size(); 3062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float efficiency = 1.0f; 3072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t entry_index = 0; 3082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t first_entry_index_with_this_product_size = 0; 3092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t product_size = first_file.entries[0].product_size; 3102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (entry_index < num_entries) { 3112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ++entry_index; 3122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (entry_index == num_entries || 3132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first_file.entries[entry_index].product_size != product_size) 3142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 3152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float efficiency_this_product_size = 0.0f; 3162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) { 3172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float efficiency_this_entry = 1.0f; 3182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto i = subset.begin(); i != subset.end(); ++i) { 3192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency); 3202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang efficiency_this_product_size = max(efficiency_this_product_size, efficiency_this_entry); 3222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang efficiency = min(efficiency, efficiency_this_product_size); 3242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (entry_index < num_entries) { 3252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first_entry_index_with_this_product_size = entry_index; 3262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang product_size = first_file.entries[entry_index].product_size; 3272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return efficiency; 3322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 3342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangvoid dump_table_for_subset( 3352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<preprocessed_inputfile_t>& preprocessed_inputfiles, 3362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<size_t>& subset) 3372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 3382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const preprocessed_inputfile_t& first_file = preprocessed_inputfiles[subset[0]]; 3392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const size_t num_entries = first_file.entries.size(); 3402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t entry_index = 0; 3412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t first_entry_index_with_this_product_size = 0; 3422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t product_size = first_file.entries[0].product_size; 3432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t i = 0; 3442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_triple_t min_product_size(first_file.entries.front().product_size); 3452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_triple_t max_product_size(first_file.entries.back().product_size); 3462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (!min_product_size.is_cubic() || !max_product_size.is_cubic()) { 3472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang abort(); 3482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (only_cubic_sizes) { 3502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "Can't generate tables with --only-cubic-sizes." << endl; 3512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang abort(); 3522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << "struct LookupTable {" << endl; 3542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << " static const size_t BaseSize = " << min_product_size.k << ";" << endl; 3552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const size_t NumSizes = log2_pot(max_product_size.k / min_product_size.k) + 1; 3562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const size_t TableSize = NumSizes * NumSizes * NumSizes; 3572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << " static const size_t NumSizes = " << NumSizes << ";" << endl; 3582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << " static const unsigned short* Data() {" << endl; 3592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << " static const unsigned short data[" << TableSize << "] = {"; 3602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (entry_index < num_entries) { 3612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ++entry_index; 3622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (entry_index == num_entries || 3632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first_file.entries[entry_index].product_size != product_size) 3642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 3652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float best_efficiency_this_product_size = 0.0f; 3662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t best_block_size_this_product_size = 0; 3672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t e = first_entry_index_with_this_product_size; e < entry_index; e++) { 3682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float efficiency_this_entry = 1.0f; 3692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto i = subset.begin(); i != subset.end(); ++i) { 3702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang efficiency_this_entry = min(efficiency_this_entry, preprocessed_inputfiles[*i].entries[e].efficiency); 3712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (efficiency_this_entry > best_efficiency_this_product_size) { 3732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang best_efficiency_this_product_size = efficiency_this_entry; 3742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang best_block_size_this_product_size = first_file.entries[e].block_size; 3752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if ((i++) % NumSizes) { 3782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << " "; 3792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } else { 3802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << endl << " "; 3812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << "0x" << hex << best_block_size_this_product_size << dec; 3832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (entry_index < num_entries) { 3842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << ","; 3852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang first_entry_index_with_this_product_size = entry_index; 3862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang product_size = first_file.entries[entry_index].product_size; 3872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (i != TableSize) { 3912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << endl << "Wrote " << i << " table entries, expected " << TableSize << endl; 3922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang abort(); 3932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 3942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << endl << " };" << endl; 3952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << " return data;" << endl; 3962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << " }" << endl; 3972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << "};" << endl; 3982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 3992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangfloat efficiency_of_partition( 4012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<preprocessed_inputfile_t>& preprocessed_inputfiles, 4022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<vector<size_t>>& partition) 4032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float efficiency = 1.0f; 4052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto s = partition.begin(); s != partition.end(); ++s) { 4062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang efficiency = min(efficiency, efficiency_of_subset(preprocessed_inputfiles, *s)); 4072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return efficiency; 4092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangvoid make_first_subset(size_t subset_size, vector<size_t>& out_subset, size_t set_size) 4122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang assert(subset_size >= 1 && subset_size <= set_size); 4142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang out_subset.resize(subset_size); 4152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t i = 0; i < subset_size; i++) { 4162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang out_subset[i] = i; 4172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangbool is_last_subset(const vector<size_t>& subset, size_t set_size) 4212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return subset[0] == set_size - subset.size(); 4232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangvoid next_subset(vector<size_t>& inout_subset, size_t set_size) 4262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (is_last_subset(inout_subset, set_size)) { 4282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "iterating past the last subset" << endl; 4292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang abort(); 4302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t i = 1; 4322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (inout_subset[inout_subset.size() - i] == set_size - i) { 4332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang i++; 4342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang assert(i <= inout_subset.size()); 4352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t first_index_to_change = inout_subset.size() - i; 4372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang inout_subset[first_index_to_change]++; 4382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t p = inout_subset[first_index_to_change]; 4392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t j = first_index_to_change + 1; j < inout_subset.size(); j++) { 4402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang inout_subset[j] = ++p; 4412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangconst size_t number_of_subsets_limit = 100; 4452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangconst size_t always_search_subsets_of_size_at_least = 2; 4462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangbool is_number_of_subsets_feasible(size_t n, size_t p) 4482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang assert(n>0 && p>0 && p<=n); 4502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint64_t numerator = 1, denominator = 1; 4512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t i = 0; i < p; i++) { 4522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang numerator *= n - i; 4532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang denominator *= i + 1; 4542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (numerator > denominator * number_of_subsets_limit) { 4552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return false; 4562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return true; 4592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangsize_t max_feasible_subset_size(size_t n) 4622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang assert(n > 0); 4642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const size_t minresult = min<size_t>(n-1, always_search_subsets_of_size_at_least); 4652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t p = 1; p <= n - 1; p++) { 4662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (!is_number_of_subsets_feasible(n, p+1)) { 4672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return max(p, minresult); 4682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return n - 1; 4712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 4722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangvoid find_subset_with_efficiency_higher_than( 4742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<preprocessed_inputfile_t>& preprocessed_inputfiles, 4752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float required_efficiency_to_beat, 4762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<size_t>& inout_remainder, 4772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<size_t>& out_subset) 4782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 4792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang out_subset.resize(0); 4802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (required_efficiency_to_beat >= 1.0f) { 4822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "can't beat efficiency 1." << endl; 4832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang abort(); 4842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (!inout_remainder.empty()) { 4872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<size_t> candidate_indices(inout_remainder.size()); 4892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t i = 0; i < candidate_indices.size(); i++) { 4902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang candidate_indices[i] = i; 4912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 4922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 4932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t candidate_indices_subset_size = max_feasible_subset_size(candidate_indices.size()); 4942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (candidate_indices_subset_size >= 1) { 4952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<size_t> candidate_indices_subset; 4962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang make_first_subset(candidate_indices_subset_size, 4972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang candidate_indices_subset, 4982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang candidate_indices.size()); 4992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<size_t> best_candidate_indices_subset; 5012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float best_efficiency = 0.0f; 5022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<size_t> trial_subset = out_subset; 5032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang trial_subset.resize(out_subset.size() + candidate_indices_subset_size); 5042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (true) 5052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 5062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t i = 0; i < candidate_indices_subset_size; i++) { 5072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang trial_subset[out_subset.size() + i] = inout_remainder[candidate_indices_subset[i]]; 5082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset); 5112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (trial_efficiency > best_efficiency) { 5122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang best_efficiency = trial_efficiency; 5132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang best_candidate_indices_subset = candidate_indices_subset; 5142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (is_last_subset(candidate_indices_subset, candidate_indices.size())) { 5162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 5172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang next_subset(candidate_indices_subset, candidate_indices.size()); 5192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (best_efficiency > required_efficiency_to_beat) { 5222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t i = 0; i < best_candidate_indices_subset.size(); i++) { 5232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang candidate_indices[i] = candidate_indices[best_candidate_indices_subset[i]]; 5242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang candidate_indices.resize(best_candidate_indices_subset.size()); 5262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang candidate_indices_subset_size--; 5282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t candidate_index = candidate_indices[0]; 5312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang auto candidate_iterator = inout_remainder.begin() + candidate_index; 5322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<size_t> trial_subset = out_subset; 5332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang trial_subset.push_back(*candidate_iterator); 5352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float trial_efficiency = efficiency_of_subset(preprocessed_inputfiles, trial_subset); 5362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (trial_efficiency > required_efficiency_to_beat) { 5372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang out_subset.push_back(*candidate_iterator); 5382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang inout_remainder.erase(candidate_iterator); 5392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } else { 5402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 5412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 5442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangvoid find_partition_with_efficiency_higher_than( 5462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<preprocessed_inputfile_t>& preprocessed_inputfiles, 5472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float required_efficiency_to_beat, 5482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<vector<size_t>>& out_partition) 5492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 5502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang out_partition.resize(0); 5512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<size_t> remainder; 5532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t i = 0; i < preprocessed_inputfiles.size(); i++) { 5542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang remainder.push_back(i); 5552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (!remainder.empty()) { 5582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<size_t> new_subset; 5592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang find_subset_with_efficiency_higher_than( 5602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang preprocessed_inputfiles, 5612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang required_efficiency_to_beat, 5622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang remainder, 5632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang new_subset); 5642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang out_partition.push_back(new_subset); 5652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 5672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangvoid print_partition( 5692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<preprocessed_inputfile_t>& preprocessed_inputfiles, 5702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<vector<size_t>>& partition) 5712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 5722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float efficiency = efficiency_of_partition(preprocessed_inputfiles, partition); 5732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << "Partition into " << partition.size() << " subsets for " << efficiency * 100.0f << "% efficiency" << endl; 5742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto subset = partition.begin(); subset != partition.end(); ++subset) { 5752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << " Subset " << (subset - partition.begin()) 5762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << ", efficiency " << efficiency_of_subset(preprocessed_inputfiles, *subset) * 100.0f << "%:" 5772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << endl; 5782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto file = subset->begin(); file != subset->end(); ++file) { 5792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << " " << preprocessed_inputfiles[*file].filename << endl; 5802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (dump_tables) { 5822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << " Table:" << endl; 5832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang dump_table_for_subset(preprocessed_inputfiles, *subset); 5842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 5862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << endl; 5872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 5882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct action_t 5902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 5912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang virtual const char* invokation_name() const { abort(); return nullptr; } 5922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang virtual void run(const vector<string>&) const { abort(); } 5932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang virtual ~action_t() {} 5942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 5952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 5962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct partition_action_t : action_t 5972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 5982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang virtual const char* invokation_name() const override { return "partition"; } 5992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang virtual void run(const vector<string>& input_filenames) const override 6002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 6012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<preprocessed_inputfile_t> preprocessed_inputfiles; 6022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 6032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (input_filenames.empty()) { 6042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "The " << invokation_name() << " action needs a list of input files." << endl; 6052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 6062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 6082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it = input_filenames.begin(); it != input_filenames.end(); ++it) { 6092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang inputfile_t inputfile(*it); 6102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang switch (inputfile.type) { 6112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case inputfile_t::type_t::all_pot_sizes: 6122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang preprocessed_inputfiles.emplace_back(inputfile); 6132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 6142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang case inputfile_t::type_t::default_sizes: 6152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "The " << invokation_name() << " action only uses measurements for all pot sizes, and " 6162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << "has no use for " << *it << " which contains measurements for default sizes." << endl; 6172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 6182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 6192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang default: 6202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "Unrecognized input file: " << *it << endl; 6212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 6222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 6252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang check_all_files_in_same_exact_order(preprocessed_inputfiles); 6262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 6272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float required_efficiency_to_beat = 0.0f; 6282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<vector<vector<size_t>>> partitions; 6292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "searching for partitions...\r" << flush; 6302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (true) 6312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 6322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<vector<size_t>> partition; 6332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang find_partition_with_efficiency_higher_than( 6342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang preprocessed_inputfiles, 6352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang required_efficiency_to_beat, 6362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang partition); 6372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition); 6382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() 6392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " subsets for " << 100.0f * actual_efficiency 6402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " % efficiency" 6412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " \r" << flush; 6422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang partitions.push_back(partition); 6432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) { 6442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 6452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang required_efficiency_to_beat = actual_efficiency; 6472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << " " << endl; 6492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (true) { 6502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang bool repeat = false; 6512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t i = 0; i < partitions.size() - 1; i++) { 6522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (partitions[i].size() >= partitions[i+1].size()) { 6532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang partitions.erase(partitions.begin() + i); 6542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang repeat = true; 6552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 6562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (!repeat) { 6592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 6602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it = partitions.begin(); it != partitions.end(); ++it) { 6632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang print_partition(preprocessed_inputfiles, *it); 6642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 6672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 6682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangstruct evaluate_defaults_action_t : action_t 6692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 6702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang struct results_entry_t { 6712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t product_size; 6722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_triple_t default_block_size; 6732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t best_pot_block_size; 6742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float default_gflops; 6752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float best_pot_gflops; 6762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float default_efficiency; 6772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang }; 6782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang friend ostream& operator<<(ostream& s, const results_entry_t& entry) 6792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 6802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return s 6812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << "Product size " << size_triple_t(entry.product_size) 6822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << ": default block size " << entry.default_block_size 6832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " -> " << entry.default_gflops 6842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " GFlop/s = " << entry.default_efficiency * 100.0f << " %" 6852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " of best POT block size " << size_triple_t(entry.best_pot_block_size) 6862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " -> " << entry.best_pot_gflops 6872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " GFlop/s" << dec; 6882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang static bool lower_efficiency(const results_entry_t& e1, const results_entry_t& e2) { 6902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang return e1.default_efficiency < e2.default_efficiency; 6912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 6922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang virtual const char* invokation_name() const override { return "evaluate-defaults"; } 6932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang void show_usage_and_exit() const 6942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 6952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "usage: " << invokation_name() << " default-sizes-data all-pot-sizes-data" << endl; 6962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "checks how well the performance with default sizes compares to the best " 6972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << "performance measured over all POT sizes." << endl; 6982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 6992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang virtual void run(const vector<string>& input_filenames) const override 7012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 7022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (input_filenames.size() != 2) { 7032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang show_usage_and_exit(); 7042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang inputfile_t inputfile_default_sizes(input_filenames[0]); 7062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang inputfile_t inputfile_all_pot_sizes(input_filenames[1]); 7072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (inputfile_default_sizes.type != inputfile_t::type_t::default_sizes) { 7082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << inputfile_default_sizes.filename << " is not an input file with default sizes." << endl; 7092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang show_usage_and_exit(); 7102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (inputfile_all_pot_sizes.type != inputfile_t::type_t::all_pot_sizes) { 7122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << inputfile_all_pot_sizes.filename << " is not an input file with all POT sizes." << endl; 7132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang show_usage_and_exit(); 7142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<results_entry_t> results; 7162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<results_entry_t> cubic_results; 7172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t product_size = 0; 7192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang auto it_all_pot_sizes = inputfile_all_pot_sizes.entries.begin(); 7202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it_default_sizes = inputfile_default_sizes.entries.begin(); 7212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang it_default_sizes != inputfile_default_sizes.entries.end(); 7222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ++it_default_sizes) 7232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 7242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (it_default_sizes->product_size == product_size) { 7252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang continue; 7262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang product_size = it_default_sizes->product_size; 7282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang while (it_all_pot_sizes != inputfile_all_pot_sizes.entries.end() && 7292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang it_all_pot_sizes->product_size != product_size) 7302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 7312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ++it_all_pot_sizes; 7322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (it_all_pot_sizes == inputfile_all_pot_sizes.entries.end()) { 7342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 7352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang uint16_t best_pot_block_size = 0; 7372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang float best_pot_gflops = 0; 7382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it = it_all_pot_sizes; 7392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang it != inputfile_all_pot_sizes.entries.end() && it->product_size == product_size; 7402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang ++it) 7412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang { 7422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (it->gflops > best_pot_gflops) { 7432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang best_pot_gflops = it->gflops; 7442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang best_pot_block_size = it->pot_block_size; 7452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang results_entry_t entry; 7482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.product_size = product_size; 7492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.default_block_size = it_default_sizes->nonpot_block_size; 7502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.best_pot_block_size = best_pot_block_size; 7512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.default_gflops = it_default_sizes->gflops; 7522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.best_pot_gflops = best_pot_gflops; 7532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang entry.default_efficiency = entry.default_gflops / entry.best_pot_gflops; 7542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang results.push_back(entry); 7552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_triple_t t(product_size); 7572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (t.k == t.m && t.m == t.n) { 7582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cubic_results.push_back(entry); 7592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << "All results:" << endl; 7632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it = results.begin(); it != results.end(); ++it) { 7642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << *it << endl; 7652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << endl; 7672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sort(results.begin(), results.end(), lower_efficiency); 7692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const size_t n = min<size_t>(20, results.size()); 7712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << n << " worst results:" << endl; 7722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (size_t i = 0; i < n; i++) { 7732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << results[i] << endl; 7742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << endl; 7762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7772b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << "cubic results:" << endl; 7782b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it = cubic_results.begin(); it != cubic_results.end(); ++it) { 7792b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << *it << endl; 7802b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7812b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << endl; 7822b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7832b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang sort(cubic_results.begin(), cubic_results.end(), lower_efficiency); 7842b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7852b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout.precision(2); 7862b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<float> a = {0.5f, 0.20f, 0.10f, 0.05f, 0.02f, 0.01f}; 7872b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it = a.begin(); it != a.end(); ++it) { 7882b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang size_t n = min(results.size() - 1, size_t(*it * results.size())); 7892b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout << (100.0f * n / (results.size() - 1)) 7902b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << " % of product sizes have default efficiency <= " 7912b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang << 100.0f * results[n].default_efficiency << " %" << endl; 7922b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7932b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout.precision(default_precision); 7942b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 7952b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang}; 7962b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7972b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 7982b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangvoid show_usage_and_exit(int argc, char* argv[], 7992b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang const vector<unique_ptr<action_t>>& available_actions) 8002b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 8012b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "usage: " << argv[0] << " <action> [options...] <input files...>" << endl; 8022b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "available actions:" << endl; 8032b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { 8042b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << " " << (*it)->invokation_name() << endl; 8052b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8062b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl; 8072b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang exit(1); 8082b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 8092b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8102b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wangint main(int argc, char* argv[]) 8112b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang{ 8122b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cout.precision(default_precision); 8132b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr.precision(default_precision); 8142b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8152b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<unique_ptr<action_t>> available_actions; 8162b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang available_actions.emplace_back(new partition_action_t); 8172b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang available_actions.emplace_back(new evaluate_defaults_action_t); 8182b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8192b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang vector<string> input_filenames; 8202b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8212b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang action_t* action = nullptr; 8222b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8232b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (argc < 2) { 8242b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang show_usage_and_exit(argc, argv, available_actions); 8252b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8262b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (int i = 1; i < argc; i++) { 8272b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang bool arg_handled = false; 8282b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Step 1. Try to match action invokation names. 8292b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { 8302b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (!strcmp(argv[i], (*it)->invokation_name())) { 8312b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (!action) { 8322b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang action = it->get(); 8332b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang arg_handled = true; 8342b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang break; 8352b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } else { 8362b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "can't specify more than one action!" << endl; 8372b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang show_usage_and_exit(argc, argv, available_actions); 8382b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8392b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8402b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8412b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (arg_handled) { 8422b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang continue; 8432b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8442b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Step 2. Try to match option names. 8452b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (argv[i][0] == '-') { 8462b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (!strcmp(argv[i], "--only-cubic-sizes")) { 8472b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang only_cubic_sizes = true; 8482b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang arg_handled = true; 8492b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8502b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (!strcmp(argv[i], "--dump-tables")) { 8512b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang dump_tables = true; 8522b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang arg_handled = true; 8532b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8542b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (!arg_handled) { 8552b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "Unrecognized option: " << argv[i] << endl; 8562b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang show_usage_and_exit(argc, argv, available_actions); 8572b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8582b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8592b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (arg_handled) { 8602b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang continue; 8612b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8622b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang // Step 3. Default to interpreting args as input filenames. 8632b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang input_filenames.emplace_back(argv[i]); 8642b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8652b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8662b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (dump_tables && only_cubic_sizes) { 8672b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang cerr << "Incompatible options: --only-cubic-sizes and --dump-tables." << endl; 8682b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang show_usage_and_exit(argc, argv, available_actions); 8692b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8702b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8712b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang if (!action) { 8722b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang show_usage_and_exit(argc, argv, available_actions); 8732b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang } 8742b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang 8752b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang action->run(input_filenames); 8762b8756b6f1de65d3f8bffab45be6c44ceb7411fcMiao Wang} 877