123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837 |
- #if !defined(GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP)
- #define GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 1
- #include <algorithm>
- #include <vector>
- #include <queue>
- #include <utility>
- #include <cstring>
- #include <limits>
- #include <cmath>
- #include <iostream>
- #include <sstream>
- #include <GeographicLib/Constants.hpp>
- #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
- GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
- #include <boost/serialization/nvp.hpp>
- #include <boost/serialization/split_member.hpp>
- #include <boost/serialization/array.hpp>
- #include <boost/serialization/vector.hpp>
- #endif
- #if defined(_MSC_VER)
- # pragma warning (push)
- # pragma warning (disable: 4127)
- #endif
- namespace GeographicLib {
-
- template <typename dist_t, typename pos_t, class distfun_t>
- class NearestNeighbor {
-
- static const int version = 1;
-
-
- static const int maxbucket =
- (2 + ((4 * sizeof(dist_t)) / sizeof(int) >= 2 ?
- (4 * sizeof(dist_t)) / sizeof(int) : 2));
- public:
-
- NearestNeighbor() : _numpoints(0), _bucket(0), _cost(0) {}
-
- NearestNeighbor(const std::vector<pos_t>& pts, const distfun_t& dist,
- int bucket = 4) {
- Initialize(pts, dist, bucket);
- }
-
- void Initialize(const std::vector<pos_t>& pts, const distfun_t& dist,
- int bucket = 4) {
- static_assert(std::numeric_limits<dist_t>::is_signed,
- "dist_t must be a signed type");
- if (!( 0 <= bucket && bucket <= maxbucket ))
- throw GeographicLib::GeographicErr
- ("bucket must lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)]");
- if (pts.size() > size_t(std::numeric_limits<int>::max()))
- throw GeographicLib::GeographicErr("pts array too big");
-
- std::vector<item> ids(pts.size());
- for (int k = int(ids.size()); k--;)
- ids[k] = std::make_pair(dist_t(0), k);
- int cost = 0;
- std::vector<Node> tree;
- init(pts, dist, bucket, tree, ids, cost,
- 0, int(ids.size()), int(ids.size()/2));
- _tree.swap(tree);
- _numpoints = int(pts.size());
- _bucket = bucket;
- _mc = _sc = 0;
- _cost = cost; _c1 = _k = _cmax = 0;
- _cmin = std::numeric_limits<int>::max();
- }
-
- dist_t Search(const std::vector<pos_t>& pts, const distfun_t& dist,
- const pos_t& query,
- std::vector<int>& ind,
- int k = 1,
- dist_t maxdist = std::numeric_limits<dist_t>::max(),
- dist_t mindist = -1,
- bool exhaustive = true,
- dist_t tol = 0) const {
- if (_numpoints != int(pts.size()))
- throw GeographicLib::GeographicErr("pts array has wrong size");
- std::priority_queue<item> results;
- if (_numpoints > 0 && k > 0 && maxdist > mindist) {
-
- dist_t tau = maxdist;
-
-
-
- std::priority_queue<item> todo;
- todo.push(std::make_pair(dist_t(1), int(_tree.size()) - 1));
- int c = 0;
- while (!todo.empty()) {
- int n = todo.top().second;
- dist_t d = -todo.top().first;
- todo.pop();
- dist_t tau1 = tau - tol;
-
- if (!( n >= 0 && tau1 >= d )) continue;
- const Node& current = _tree[n];
- dist_t dst = 0;
- bool exitflag = false, leaf = current.index < 0;
- for (int i = 0; i < (leaf ? _bucket : 1); ++i) {
- int index = leaf ? current.leaves[i] : current.index;
- if (index < 0) break;
- dst = dist(pts[index], query);
- ++c;
- if (dst > mindist && dst <= tau) {
- if (int(results.size()) == k) results.pop();
- results.push(std::make_pair(dst, index));
- if (int(results.size()) == k) {
- if (exhaustive)
- tau = results.top().first;
- else {
- exitflag = true;
- break;
- }
- if (tau <= tol) {
- exitflag = true;
- break;
- }
- }
- }
- }
- if (exitflag) break;
- if (current.index < 0) continue;
- tau1 = tau - tol;
- for (int l = 0; l < 2; ++l) {
- if (current.data.child[l] >= 0 &&
- dst + current.data.upper[l] >= mindist) {
- if (dst < current.data.lower[l]) {
- d = current.data.lower[l] - dst;
- if (tau1 >= d)
- todo.push(std::make_pair(-d, current.data.child[l]));
- } else if (dst > current.data.upper[l]) {
- d = dst - current.data.upper[l];
- if (tau1 >= d)
- todo.push(std::make_pair(-d, current.data.child[l]));
- } else
- todo.push(std::make_pair(dist_t(1), current.data.child[l]));
- }
- }
- }
- ++_k;
- _c1 += c;
- double omc = _mc;
- _mc += (c - omc) / _k;
- _sc += (c - omc) * (c - _mc);
- if (c > _cmax) _cmax = c;
- if (c < _cmin) _cmin = c;
- }
- dist_t d = -1;
- ind.resize(results.size());
- for (int i = int(ind.size()); i--;) {
- ind[i] = int(results.top().second);
- if (i == 0) d = results.top().first;
- results.pop();
- }
- return d;
- }
-
- int NumPoints() const { return _numpoints; }
-
- void Save(std::ostream& os, bool bin = true) const {
- int realspec = std::numeric_limits<dist_t>::digits *
- (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
- if (bin) {
- char id[] = "NearestNeighbor_";
- os.write(id, 16);
- int buf[6];
- buf[0] = version;
- buf[1] = realspec;
- buf[2] = _bucket;
- buf[3] = _numpoints;
- buf[4] = int(_tree.size());
- buf[5] = _cost;
- os.write(reinterpret_cast<const char *>(buf), 6 * sizeof(int));
- for (int i = 0; i < int(_tree.size()); ++i) {
- const Node& node = _tree[i];
- os.write(reinterpret_cast<const char *>(&node.index), sizeof(int));
- if (node.index >= 0) {
- os.write(reinterpret_cast<const char *>(node.data.lower),
- 2 * sizeof(dist_t));
- os.write(reinterpret_cast<const char *>(node.data.upper),
- 2 * sizeof(dist_t));
- os.write(reinterpret_cast<const char *>(node.data.child),
- 2 * sizeof(int));
- } else {
- os.write(reinterpret_cast<const char *>(node.leaves),
- _bucket * sizeof(int));
- }
- }
- } else {
- std::stringstream ostring;
-
-
- if (!std::numeric_limits<dist_t>::is_integer) {
- static const int prec
- = int(std::ceil(std::numeric_limits<dist_t>::digits *
- std::log10(2.0) + 1));
- ostring.precision(prec);
- }
- ostring << version << " " << realspec << " " << _bucket << " "
- << _numpoints << " " << _tree.size() << " " << _cost;
- for (int i = 0; i < int(_tree.size()); ++i) {
- const Node& node = _tree[i];
- ostring << "\n" << node.index;
- if (node.index >= 0) {
- for (int l = 0; l < 2; ++l)
- ostring << " " << node.data.lower[l] << " " << node.data.upper[l]
- << " " << node.data.child[l];
- } else {
- for (int l = 0; l < _bucket; ++l)
- ostring << " " << node.leaves[l];
- }
- }
- os << ostring.str();
- }
- }
-
- void Load(std::istream& is, bool bin = true) {
- int version1, realspec, bucket, numpoints, treesize, cost;
- if (bin) {
- char id[17];
- is.read(id, 16);
- id[16] = '\0';
- if (!(std::strcmp(id, "NearestNeighbor_") == 0))
- throw GeographicLib::GeographicErr("Bad ID");
- is.read(reinterpret_cast<char *>(&version1), sizeof(int));
- is.read(reinterpret_cast<char *>(&realspec), sizeof(int));
- is.read(reinterpret_cast<char *>(&bucket), sizeof(int));
- is.read(reinterpret_cast<char *>(&numpoints), sizeof(int));
- is.read(reinterpret_cast<char *>(&treesize), sizeof(int));
- is.read(reinterpret_cast<char *>(&cost), sizeof(int));
- } else {
- if (!( is >> version1 >> realspec >> bucket >> numpoints >> treesize
- >> cost ))
- throw GeographicLib::GeographicErr("Bad header");
- }
- if (!( version1 == version ))
- throw GeographicLib::GeographicErr("Incompatible version");
- if (!( realspec == std::numeric_limits<dist_t>::digits *
- (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
- throw GeographicLib::GeographicErr("Different dist_t types");
- if (!( 0 <= bucket && bucket <= maxbucket ))
- throw GeographicLib::GeographicErr("Bad bucket size");
- if (!( 0 <= treesize && treesize <= numpoints ))
- throw
- GeographicLib::GeographicErr("Bad number of points or tree size");
- if (!( 0 <= cost ))
- throw GeographicLib::GeographicErr("Bad value for cost");
- std::vector<Node> tree;
- tree.reserve(treesize);
- for (int i = 0; i < treesize; ++i) {
- Node node;
- if (bin) {
- is.read(reinterpret_cast<char *>(&node.index), sizeof(int));
- if (node.index >= 0) {
- is.read(reinterpret_cast<char *>(node.data.lower),
- 2 * sizeof(dist_t));
- is.read(reinterpret_cast<char *>(node.data.upper),
- 2 * sizeof(dist_t));
- is.read(reinterpret_cast<char *>(node.data.child),
- 2 * sizeof(int));
- } else {
- is.read(reinterpret_cast<char *>(node.leaves),
- bucket * sizeof(int));
- for (int l = bucket; l < maxbucket; ++l)
- node.leaves[l] = 0;
- }
- } else {
- if (!( is >> node.index ))
- throw GeographicLib::GeographicErr("Bad index");
- if (node.index >= 0) {
- for (int l = 0; l < 2; ++l) {
- if (!( is >> node.data.lower[l] >> node.data.upper[l]
- >> node.data.child[l] ))
- throw GeographicLib::GeographicErr("Bad node data");
- }
- } else {
-
-
- for (int l = 0; l < bucket; ++l) {
- if (!( is >> node.leaves[l] ))
- throw GeographicLib::GeographicErr("Bad leaf data");
- }
- for (int l = bucket; l < maxbucket; ++l)
- node.leaves[l] = 0;
- }
- }
- node.Check(numpoints, treesize, bucket);
- tree.push_back(node);
- }
- _tree.swap(tree);
- _numpoints = numpoints;
- _bucket = bucket;
- _mc = _sc = 0;
- _cost = cost; _c1 = _k = _cmax = 0;
- _cmin = std::numeric_limits<int>::max();
- }
-
- friend std::ostream& operator<<(std::ostream& os, const NearestNeighbor& t)
- { t.Save(os, false); return os; }
-
- friend std::istream& operator>>(std::istream& is, NearestNeighbor& t)
- { t.Load(is, false); return is; }
-
- void swap(NearestNeighbor& t) {
- std::swap(_numpoints, t._numpoints);
- std::swap(_bucket, t._bucket);
- std::swap(_cost, t._cost);
- _tree.swap(t._tree);
- std::swap(_mc, t._mc);
- std::swap(_sc, t._sc);
- std::swap(_c1, t._c1);
- std::swap(_k, t._k);
- std::swap(_cmin, t._cmin);
- std::swap(_cmax, t._cmax);
- }
-
- void Statistics(int& setupcost, int& numsearches, int& searchcost,
- int& mincost, int& maxcost,
- double& mean, double& sd) const {
- setupcost = _cost; numsearches = _k; searchcost = _c1;
- mincost = _cmin; maxcost = _cmax;
- mean = _mc; sd = std::sqrt(_sc / (_k - 1));
- }
-
- void ResetStatistics() const {
- _mc = _sc = 0;
- _c1 = _k = _cmax = 0;
- _cmin = std::numeric_limits<int>::max();
- }
- private:
-
-
- typedef std::pair<dist_t, int> item;
-
- class Node {
- public:
- struct bounds {
- dist_t lower[2], upper[2];
- int child[2];
- };
- union {
- bounds data;
- int leaves[maxbucket];
- };
- int index;
- Node()
- : index(-1)
- {
- for (int i = 0; i < 2; ++i) {
- data.lower[i] = data.upper[i] = 0;
- data.child[i] = -1;
- }
- }
-
- void Check(int numpoints, int treesize, int bucket) const {
- if (!( -1 <= index && index < numpoints ))
- throw GeographicLib::GeographicErr("Bad index");
- if (index >= 0) {
- if (!( -1 <= data.child[0] && data.child[0] < treesize &&
- -1 <= data.child[1] && data.child[1] < treesize ))
- throw GeographicLib::GeographicErr("Bad child pointers");
- if (!( 0 <= data.lower[0] && data.lower[0] <= data.upper[0] &&
- data.upper[0] <= data.lower[1] &&
- data.lower[1] <= data.upper[1] ))
- throw GeographicLib::GeographicErr("Bad bounds");
- } else {
-
-
- bool start = true;
- for (int l = 0; l < bucket; ++l) {
- if (!( (start ?
- ((l == 0 ? 0 : -1) <= leaves[l] && leaves[l] < numpoints) :
- leaves[l] == -1) ))
- throw GeographicLib::GeographicErr("Bad leaf data");
- start = leaves[l] >= 0;
- }
- for (int l = bucket; l < maxbucket; ++l) {
- if (leaves[l] != 0)
- throw GeographicLib::GeographicErr("Bad leaf data");
- }
- }
- }
- #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
- GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
- friend class boost::serialization::access;
- template<class Archive>
- void save(Archive& ar, const unsigned int) const {
- ar & boost::serialization::make_nvp("index", index);
- if (index < 0)
- ar & boost::serialization::make_nvp("leaves", leaves);
- else
- ar & boost::serialization::make_nvp("lower", data.lower)
- & boost::serialization::make_nvp("upper", data.upper)
- & boost::serialization::make_nvp("child", data.child);
- }
- template<class Archive>
- void load(Archive& ar, const unsigned int) {
- ar & boost::serialization::make_nvp("index", index);
- if (index < 0)
- ar & boost::serialization::make_nvp("leaves", leaves);
- else
- ar & boost::serialization::make_nvp("lower", data.lower)
- & boost::serialization::make_nvp("upper", data.upper)
- & boost::serialization::make_nvp("child", data.child);
- }
- template<class Archive>
- void serialize(Archive& ar, const unsigned int file_version)
- { boost::serialization::split_member(ar, *this, file_version); }
- #endif
- };
-
- #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
- GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
- friend class boost::serialization::access;
- template<class Archive> void save(Archive& ar, const unsigned) const {
- int realspec = std::numeric_limits<dist_t>::digits *
- (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
-
-
- int version1 = version;
- ar & boost::serialization::make_nvp("version", version1)
- & boost::serialization::make_nvp("realspec", realspec)
- & boost::serialization::make_nvp("bucket", _bucket)
- & boost::serialization::make_nvp("numpoints", _numpoints)
- & boost::serialization::make_nvp("cost", _cost)
- & boost::serialization::make_nvp("tree", _tree);
- }
- template<class Archive> void load(Archive& ar, const unsigned) {
- int version1, realspec, bucket, numpoints, cost;
- ar & boost::serialization::make_nvp("version", version1);
- if (version1 != version)
- throw GeographicLib::GeographicErr("Incompatible version");
- std::vector<Node> tree;
- ar & boost::serialization::make_nvp("realspec", realspec);
- if (!( realspec == std::numeric_limits<dist_t>::digits *
- (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
- throw GeographicLib::GeographicErr("Different dist_t types");
- ar & boost::serialization::make_nvp("bucket", bucket);
- if (!( 0 <= bucket && bucket <= maxbucket ))
- throw GeographicLib::GeographicErr("Bad bucket size");
- ar & boost::serialization::make_nvp("numpoints", numpoints)
- & boost::serialization::make_nvp("cost", cost)
- & boost::serialization::make_nvp("tree", tree);
- if (!( 0 <= int(tree.size()) && int(tree.size()) <= numpoints ))
- throw
- GeographicLib::GeographicErr("Bad number of points or tree size");
- for (int i = 0; i < int(tree.size()); ++i)
- tree[i].Check(numpoints, int(tree.size()), bucket);
- _tree.swap(tree);
- _numpoints = numpoints;
- _bucket = bucket;
- _mc = _sc = 0;
- _cost = cost; _c1 = _k = _cmax = 0;
- _cmin = std::numeric_limits<int>::max();
- }
- template<class Archive>
- void serialize(Archive& ar, const unsigned int file_version)
- { boost::serialization::split_member(ar, *this, file_version); }
- #endif
- int _numpoints, _bucket, _cost;
- std::vector<Node> _tree;
-
- mutable double _mc, _sc;
- mutable int _c1, _k, _cmin, _cmax;
- int init(const std::vector<pos_t>& pts, const distfun_t& dist, int bucket,
- std::vector<Node>& tree, std::vector<item>& ids, int& cost,
- int l, int u, int vp) {
- if (u == l)
- return -1;
- Node node;
- if (u - l > (bucket == 0 ? 1 : bucket)) {
-
- int i = vp;
- std::swap(ids[l], ids[i]);
- int m = (u + l + 1) / 2;
- for (int k = l + 1; k < u; ++k) {
- ids[k].first = dist(pts[ids[l].second], pts[ids[k].second]);
- ++cost;
- }
-
- std::nth_element(ids.begin() + l + 1,
- ids.begin() + m,
- ids.begin() + u);
- node.index = ids[l].second;
- if (m > l + 1) {
- typename std::vector<item>::iterator
- t = std::min_element(ids.begin() + l + 1, ids.begin() + m);
- node.data.lower[0] = t->first;
- t = std::max_element(ids.begin() + l + 1, ids.begin() + m);
- node.data.upper[0] = t->first;
-
-
- node.data.child[0] = init(pts, dist, bucket, tree, ids, cost,
- l + 1, m, int(t - ids.begin()));
- }
- typename std::vector<item>::iterator
- t = std::max_element(ids.begin() + m, ids.begin() + u);
- node.data.lower[1] = ids[m].first;
- node.data.upper[1] = t->first;
-
- node.data.child[1] = init(pts, dist, bucket, tree, ids, cost,
- m, u, int(t - ids.begin()));
- } else {
- if (bucket == 0)
- node.index = ids[l].second;
- else {
- node.index = -1;
-
-
- std::sort(ids.begin() + l, ids.begin() + u);
- for (int i = l; i < u; ++i)
- node.leaves[i-l] = ids[i].second;
- for (int i = u - l; i < bucket; ++i)
- node.leaves[i] = -1;
- for (int i = bucket; i < maxbucket; ++i)
- node.leaves[i] = 0;
- }
- }
- tree.push_back(node);
- return int(tree.size()) - 1;
- }
- };
- }
- namespace std {
-
- template <typename dist_t, typename pos_t, class distfun_t>
- void swap(GeographicLib::NearestNeighbor<dist_t, pos_t, distfun_t>& a,
- GeographicLib::NearestNeighbor<dist_t, pos_t, distfun_t>& b) {
- a.swap(b);
- }
- }
- #if defined(_MSC_VER)
- # pragma warning (pop)
- #endif
- #endif
|