NearestNeighbor.hpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837
  1. /**
  2. * \file NearestNeighbor.hpp
  3. * \brief Header for GeographicLib::NearestNeighbor class
  4. *
  5. * Copyright (c) Charles Karney (2016-2020) <charles@karney.com> and licensed
  6. * under the MIT/X11 License. For more information, see
  7. * https://geographiclib.sourceforge.io/
  8. **********************************************************************/
  9. #if !defined(GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP)
  10. #define GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 1
  11. #include <algorithm> // for nth_element, max_element, etc.
  12. #include <vector>
  13. #include <queue> // for priority_queue
  14. #include <utility> // for swap + pair
  15. #include <cstring>
  16. #include <limits>
  17. #include <cmath>
  18. #include <iostream>
  19. #include <sstream>
  20. // Only for GeographicLib::GeographicErr
  21. #include <GeographicLib/Constants.hpp>
  22. #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
  23. GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
  24. #include <boost/serialization/nvp.hpp>
  25. #include <boost/serialization/split_member.hpp>
  26. #include <boost/serialization/array.hpp>
  27. #include <boost/serialization/vector.hpp>
  28. #endif
  29. #if defined(_MSC_VER)
  30. // Squelch warnings about constant conditional expressions
  31. # pragma warning (push)
  32. # pragma warning (disable: 4127)
  33. #endif
  34. namespace GeographicLib {
  35. /**
  36. * \brief Nearest-neighbor calculations
  37. *
  38. * This class solves the nearest-neighbor problm using a vantage-point tree
  39. * as described in \ref nearest.
  40. *
  41. * This class is templated so that it can handle arbitrary metric spaces as
  42. * follows:
  43. *
  44. * @tparam dist_t the type used for measuring distances; it can be a real or
  45. * signed integer type; in typical geodetic applications, \e dist_t might
  46. * be <code>double</code>.
  47. * @tparam pos_t the type for specifying the positions of points; geodetic
  48. * application might bundled the latitude and longitude into a
  49. * <code>std::pair<dist_t, dist_t></code>.
  50. * @tparam distfun_t the type of a function object which takes takes two
  51. * positions (of type \e pos_t) and returns the distance (of type \e
  52. * dist_t); in geodetic applications, this might be a class which is
  53. * constructed with a Geodesic object and which implements a member
  54. * function with a signature <code>dist_t operator() (const pos_t&, const
  55. * pos_t&) const</code>, which returns the geodesic distance between two
  56. * points.
  57. *
  58. * \note The distance measure must satisfy the triangle inequality, \f$
  59. * d(a,c) \le d(a,b) + d(b,c) \f$ for all points \e a, \e b, \e c. The
  60. * geodesic distance (given by Geodesic::Inverse) does, while the great
  61. * ellipse distance and the rhumb line distance <i>do not</i>. If you use
  62. * the ordinary Euclidean distance, i.e., \f$ \sqrt{(x_a-x_b)^2 +
  63. * (y_a-y_b)^2} \f$ for two dimensions, don't be tempted to leave out the
  64. * square root in the interests of "efficiency"; the squared distance does
  65. * not satisfy the triangle inequality!
  66. *
  67. * \note This is a "header-only" implementation and, as such, depends in a
  68. * minimal way on the rest of GeographicLib (the only dependency is through
  69. * the use of GeographicLib::GeographicErr for handling compile-time and
  70. * run-time exceptions). Therefore, it is easy to extract this class from
  71. * the rest of GeographicLib and use it as a stand-alone facility.
  72. *
  73. * The \e dist_t type must support numeric_limits queries (specifically:
  74. * is_signed, is_integer, max(), digits).
  75. *
  76. * The NearestNeighbor object is constructed with a vector of points (type \e
  77. * pos_t) and a distance function (type \e distfun_t). However the object
  78. * does \e not store the points. When querying the object with Search(),
  79. * it's necessary to supply the same vector of points and the same distance
  80. * function.
  81. *
  82. * There's no capability in this implementation to add or remove points from
  83. * the set. Instead Initialize() should be called to re-initialize the
  84. * object with the modified vector of points.
  85. *
  86. * Because of the overhead in constructing a NearestNeighbor object for a
  87. * large set of points, functions Save() and Load() are provided to save the
  88. * object to an external file. operator<<(), operator>>() and <a
  89. * href="https://www.boost.org/libs/serialization/doc"> Boost
  90. * serialization</a> can also be used to save and restore a NearestNeighbor
  91. * object. This is illustrated in the example.
  92. *
  93. * Example of use:
  94. * \include example-NearestNeighbor.cpp
  95. **********************************************************************/
  96. template <typename dist_t, typename pos_t, class distfun_t>
  97. class NearestNeighbor {
  98. // For tracking changes to the I/O format
  99. static const int version = 1;
  100. // This is what we get "free"; but if sizeof(dist_t) = 1 (unlikely), allow
  101. // 4 slots (and this accommodates the default value bucket = 4).
  102. static const int maxbucket =
  103. (2 + ((4 * sizeof(dist_t)) / sizeof(int) >= 2 ?
  104. (4 * sizeof(dist_t)) / sizeof(int) : 2));
  105. public:
  106. /**
  107. * Default constructor for NearestNeighbor.
  108. *
  109. * This is equivalent to specifying an empty set of points.
  110. **********************************************************************/
  111. NearestNeighbor() : _numpoints(0), _bucket(0), _cost(0) {}
  112. /**
  113. * Constructor for NearestNeighbor.
  114. *
  115. * @param[in] pts a vector of points to include in the set.
  116. * @param[in] dist the distance function object.
  117. * @param[in] bucket the size of the buckets at the leaf nodes; this must
  118. * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
  119. * @exception GeographicErr if the value of \e bucket is out of bounds or
  120. * the size of \e pts is too big for an int.
  121. * @exception std::bad_alloc if memory for the tree can't be allocated.
  122. *
  123. * \e pts may contain coincident points (i.e., the distance between them
  124. * vanishes); these are treated as distinct.
  125. *
  126. * The choice of \e bucket is a tradeoff between space and efficiency. A
  127. * larger \e bucket decreases the size of the NearestNeighbor object which
  128. * scales as pts.size() / max(1, bucket) and reduces the number of distance
  129. * calculations to construct the object by log2(bucket) * pts.size().
  130. * However each search then requires about bucket additional distance
  131. * calculations.
  132. *
  133. * \warning The distances computed by \e dist must satisfy the standard
  134. * metric conditions. If not, the results are undefined. Neither the data
  135. * in \e pts nor the query points should contain NaNs or infinities because
  136. * such data violates the metric conditions.
  137. *
  138. * \warning The same arguments \e pts and \e dist must be provided
  139. * to the Search() function.
  140. **********************************************************************/
  141. NearestNeighbor(const std::vector<pos_t>& pts, const distfun_t& dist,
  142. int bucket = 4) {
  143. Initialize(pts, dist, bucket);
  144. }
  145. /**
  146. * Initialize or re-initialize NearestNeighbor.
  147. *
  148. * @param[in] pts a vector of points to include in the tree.
  149. * @param[in] dist the distance function object.
  150. * @param[in] bucket the size of the buckets at the leaf nodes; this must
  151. * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
  152. * @exception GeographicErr if the value of \e bucket is out of bounds or
  153. * the size of \e pts is too big for an int.
  154. * @exception std::bad_alloc if memory for the tree can't be allocated.
  155. *
  156. * See also the documentation on the constructor.
  157. *
  158. * If an exception is thrown, the state of the NearestNeighbor is
  159. * unchanged.
  160. **********************************************************************/
  161. void Initialize(const std::vector<pos_t>& pts, const distfun_t& dist,
  162. int bucket = 4) {
  163. static_assert(std::numeric_limits<dist_t>::is_signed,
  164. "dist_t must be a signed type");
  165. if (!( 0 <= bucket && bucket <= maxbucket ))
  166. throw GeographicLib::GeographicErr
  167. ("bucket must lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)]");
  168. if (pts.size() > size_t(std::numeric_limits<int>::max()))
  169. throw GeographicLib::GeographicErr("pts array too big");
  170. // the pair contains distance+id
  171. std::vector<item> ids(pts.size());
  172. for (int k = int(ids.size()); k--;)
  173. ids[k] = std::make_pair(dist_t(0), k);
  174. int cost = 0;
  175. std::vector<Node> tree;
  176. init(pts, dist, bucket, tree, ids, cost,
  177. 0, int(ids.size()), int(ids.size()/2));
  178. _tree.swap(tree);
  179. _numpoints = int(pts.size());
  180. _bucket = bucket;
  181. _mc = _sc = 0;
  182. _cost = cost; _c1 = _k = _cmax = 0;
  183. _cmin = std::numeric_limits<int>::max();
  184. }
  185. /**
  186. * Search the NearestNeighbor.
  187. *
  188. * @param[in] pts the vector of points used for initialization.
  189. * @param[in] dist the distance function object used for initialization.
  190. * @param[in] query the query point.
  191. * @param[out] ind a vector of indices to the closest points found.
  192. * @param[in] k the number of points to search for (default = 1).
  193. * @param[in] maxdist only return points with distances of \e maxdist or
  194. * less from \e query (default is the maximum \e dist_t).
  195. * @param[in] mindist only return points with distances of more than
  196. * \e mindist from \e query (default = &minus;1).
  197. * @param[in] exhaustive whether to do an exhaustive search (default true).
  198. * @param[in] tol the tolerance on the results (default 0).
  199. * @return the distance to the closest point found (&minus;1 if no points
  200. * are found).
  201. * @exception GeographicErr if \e pts has a different size from that used
  202. * to construct the object.
  203. *
  204. * The indices returned in \e ind are sorted by distance from \e query
  205. * (closest first).
  206. *
  207. * The simplest invocation is with just the 4 non-optional arguments. This
  208. * returns the closest distance and the index to the closest point in
  209. * <i>ind</i><sub>0</sub>. If there are several points equally close, then
  210. * <i>ind</i><sub>0</sub> gives the index of an arbirary one of them. If
  211. * there's no closest point (because the set of points is empty), then \e
  212. * ind is empty and &minus;1 is returned.
  213. *
  214. * With \e exhaustive = true and \e tol = 0 (their default values), this
  215. * finds the indices of \e k closest neighbors to \e query whose distances
  216. * to \e query are in (\e mindist, \e maxdist]. If \e mindist and \e
  217. * maxdist have their default values, then these bounds have no effect. If
  218. * \e query is one of the points in the tree, then set \e mindist = 0 to
  219. * prevent this point (and other coincident points) from being returned.
  220. *
  221. * If \e exhaustive = false, exit as soon as \e k results satisfying the
  222. * distance criteria are found. If less than \e k results are returned
  223. * then the search was exhaustive even if \e exhaustive = false.
  224. *
  225. * If \e tol is positive, do an approximate search; in this case the
  226. * results are to be interpreted as follows: if the <i>k</i>'th distance is
  227. * \e dk, then all results with distances less than or equal \e dk &minus;
  228. * \e tol are correct; all others are suspect &mdash; there may be other
  229. * closer results with distances greater or equal to \e dk &minus; \e tol.
  230. * If less than \e k results are found, then the search is exact.
  231. *
  232. * \e mindist should be used to exclude a "small" neighborhood of the query
  233. * point (relative to the average spacing of the data). If \e mindist is
  234. * large, the efficiency of the search deteriorates.
  235. *
  236. * \note Only the shortest distance is returned (as as the function value).
  237. * The distances to other points (indexed by <i>ind</i><sub><i>j</i></sub>
  238. * for \e j > 0) can be found by invoking \e dist again.
  239. *
  240. * \warning The arguments \e pts and \e dist must be identical to those
  241. * used to initialize the NearestNeighbor; if not, this function will
  242. * return some meaningless result (however, if the size of \e pts is wrong,
  243. * this function throw an exception).
  244. *
  245. * \warning The query point cannot be a NaN or infinite because then the
  246. * metric conditions are violated.
  247. **********************************************************************/
  248. dist_t Search(const std::vector<pos_t>& pts, const distfun_t& dist,
  249. const pos_t& query,
  250. std::vector<int>& ind,
  251. int k = 1,
  252. dist_t maxdist = std::numeric_limits<dist_t>::max(),
  253. dist_t mindist = -1,
  254. bool exhaustive = true,
  255. dist_t tol = 0) const {
  256. if (_numpoints != int(pts.size()))
  257. throw GeographicLib::GeographicErr("pts array has wrong size");
  258. std::priority_queue<item> results;
  259. if (_numpoints > 0 && k > 0 && maxdist > mindist) {
  260. // distance to the kth closest point so far
  261. dist_t tau = maxdist;
  262. // first is negative of how far query is outside boundary of node
  263. // +1 if on boundary or inside
  264. // second is node index
  265. std::priority_queue<item> todo;
  266. todo.push(std::make_pair(dist_t(1), int(_tree.size()) - 1));
  267. int c = 0;
  268. while (!todo.empty()) {
  269. int n = todo.top().second;
  270. dist_t d = -todo.top().first;
  271. todo.pop();
  272. dist_t tau1 = tau - tol;
  273. // compare tau and d again since tau may have become smaller.
  274. if (!( n >= 0 && tau1 >= d )) continue;
  275. const Node& current = _tree[n];
  276. dist_t dst = 0; // to suppress warning about uninitialized variable
  277. bool exitflag = false, leaf = current.index < 0;
  278. for (int i = 0; i < (leaf ? _bucket : 1); ++i) {
  279. int index = leaf ? current.leaves[i] : current.index;
  280. if (index < 0) break;
  281. dst = dist(pts[index], query);
  282. ++c;
  283. if (dst > mindist && dst <= tau) {
  284. if (int(results.size()) == k) results.pop();
  285. results.push(std::make_pair(dst, index));
  286. if (int(results.size()) == k) {
  287. if (exhaustive)
  288. tau = results.top().first;
  289. else {
  290. exitflag = true;
  291. break;
  292. }
  293. if (tau <= tol) {
  294. exitflag = true;
  295. break;
  296. }
  297. }
  298. }
  299. }
  300. if (exitflag) break;
  301. if (current.index < 0) continue;
  302. tau1 = tau - tol;
  303. for (int l = 0; l < 2; ++l) {
  304. if (current.data.child[l] >= 0 &&
  305. dst + current.data.upper[l] >= mindist) {
  306. if (dst < current.data.lower[l]) {
  307. d = current.data.lower[l] - dst;
  308. if (tau1 >= d)
  309. todo.push(std::make_pair(-d, current.data.child[l]));
  310. } else if (dst > current.data.upper[l]) {
  311. d = dst - current.data.upper[l];
  312. if (tau1 >= d)
  313. todo.push(std::make_pair(-d, current.data.child[l]));
  314. } else
  315. todo.push(std::make_pair(dist_t(1), current.data.child[l]));
  316. }
  317. }
  318. }
  319. ++_k;
  320. _c1 += c;
  321. double omc = _mc;
  322. _mc += (c - omc) / _k;
  323. _sc += (c - omc) * (c - _mc);
  324. if (c > _cmax) _cmax = c;
  325. if (c < _cmin) _cmin = c;
  326. }
  327. dist_t d = -1;
  328. ind.resize(results.size());
  329. for (int i = int(ind.size()); i--;) {
  330. ind[i] = int(results.top().second);
  331. if (i == 0) d = results.top().first;
  332. results.pop();
  333. }
  334. return d;
  335. }
  336. /**
  337. * @return the total number of points in the set.
  338. **********************************************************************/
  339. int NumPoints() const { return _numpoints; }
  340. /**
  341. * Write the object to an I/O stream.
  342. *
  343. * @param[in,out] os the stream to write to.
  344. * @param[in] bin if true (the default) save in binary mode.
  345. * @exception std::bad_alloc if memory for the string representation of the
  346. * object can't be allocated.
  347. *
  348. * The counters tracking the statistics of searches are not saved; however
  349. * the initializtion cost is saved. The format of the binary saves is \e
  350. * not portable.
  351. *
  352. * \note <a href="https://www.boost.org/libs/serialization/doc">
  353. * Boost serialization</a> can also be used to save and restore a
  354. * NearestNeighbor object. This requires that the
  355. * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
  356. **********************************************************************/
  357. void Save(std::ostream& os, bool bin = true) const {
  358. int realspec = std::numeric_limits<dist_t>::digits *
  359. (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
  360. if (bin) {
  361. char id[] = "NearestNeighbor_";
  362. os.write(id, 16);
  363. int buf[6];
  364. buf[0] = version;
  365. buf[1] = realspec;
  366. buf[2] = _bucket;
  367. buf[3] = _numpoints;
  368. buf[4] = int(_tree.size());
  369. buf[5] = _cost;
  370. os.write(reinterpret_cast<const char *>(buf), 6 * sizeof(int));
  371. for (int i = 0; i < int(_tree.size()); ++i) {
  372. const Node& node = _tree[i];
  373. os.write(reinterpret_cast<const char *>(&node.index), sizeof(int));
  374. if (node.index >= 0) {
  375. os.write(reinterpret_cast<const char *>(node.data.lower),
  376. 2 * sizeof(dist_t));
  377. os.write(reinterpret_cast<const char *>(node.data.upper),
  378. 2 * sizeof(dist_t));
  379. os.write(reinterpret_cast<const char *>(node.data.child),
  380. 2 * sizeof(int));
  381. } else {
  382. os.write(reinterpret_cast<const char *>(node.leaves),
  383. _bucket * sizeof(int));
  384. }
  385. }
  386. } else {
  387. std::stringstream ostring;
  388. // Ensure enough precision for type dist_t. With C++11, max_digits10
  389. // can be used instead.
  390. if (!std::numeric_limits<dist_t>::is_integer) {
  391. static const int prec
  392. = int(std::ceil(std::numeric_limits<dist_t>::digits *
  393. std::log10(2.0) + 1));
  394. ostring.precision(prec);
  395. }
  396. ostring << version << " " << realspec << " " << _bucket << " "
  397. << _numpoints << " " << _tree.size() << " " << _cost;
  398. for (int i = 0; i < int(_tree.size()); ++i) {
  399. const Node& node = _tree[i];
  400. ostring << "\n" << node.index;
  401. if (node.index >= 0) {
  402. for (int l = 0; l < 2; ++l)
  403. ostring << " " << node.data.lower[l] << " " << node.data.upper[l]
  404. << " " << node.data.child[l];
  405. } else {
  406. for (int l = 0; l < _bucket; ++l)
  407. ostring << " " << node.leaves[l];
  408. }
  409. }
  410. os << ostring.str();
  411. }
  412. }
  413. /**
  414. * Read the object from an I/O stream.
  415. *
  416. * @param[in,out] is the stream to read from
  417. * @param[in] bin if true (the default) load in binary mode.
  418. * @exception GeographicErr if the state read from \e is is illegal.
  419. * @exception std::bad_alloc if memory for the tree can't be allocated.
  420. *
  421. * The counters tracking the statistics of searches are reset by this
  422. * operation. Binary data must have been saved on a machine with the same
  423. * architecture. If an exception is thrown, the state of the
  424. * NearestNeighbor is unchanged.
  425. *
  426. * \note <a href="https://www.boost.org/libs/serialization/doc">
  427. * Boost serialization</a> can also be used to save and restore a
  428. * NearestNeighbor object. This requires that the
  429. * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
  430. *
  431. * \warning The same arguments \e pts and \e dist used for
  432. * initialization must be provided to the Search() function.
  433. **********************************************************************/
  434. void Load(std::istream& is, bool bin = true) {
  435. int version1, realspec, bucket, numpoints, treesize, cost;
  436. if (bin) {
  437. char id[17];
  438. is.read(id, 16);
  439. id[16] = '\0';
  440. if (!(std::strcmp(id, "NearestNeighbor_") == 0))
  441. throw GeographicLib::GeographicErr("Bad ID");
  442. is.read(reinterpret_cast<char *>(&version1), sizeof(int));
  443. is.read(reinterpret_cast<char *>(&realspec), sizeof(int));
  444. is.read(reinterpret_cast<char *>(&bucket), sizeof(int));
  445. is.read(reinterpret_cast<char *>(&numpoints), sizeof(int));
  446. is.read(reinterpret_cast<char *>(&treesize), sizeof(int));
  447. is.read(reinterpret_cast<char *>(&cost), sizeof(int));
  448. } else {
  449. if (!( is >> version1 >> realspec >> bucket >> numpoints >> treesize
  450. >> cost ))
  451. throw GeographicLib::GeographicErr("Bad header");
  452. }
  453. if (!( version1 == version ))
  454. throw GeographicLib::GeographicErr("Incompatible version");
  455. if (!( realspec == std::numeric_limits<dist_t>::digits *
  456. (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
  457. throw GeographicLib::GeographicErr("Different dist_t types");
  458. if (!( 0 <= bucket && bucket <= maxbucket ))
  459. throw GeographicLib::GeographicErr("Bad bucket size");
  460. if (!( 0 <= treesize && treesize <= numpoints ))
  461. throw
  462. GeographicLib::GeographicErr("Bad number of points or tree size");
  463. if (!( 0 <= cost ))
  464. throw GeographicLib::GeographicErr("Bad value for cost");
  465. std::vector<Node> tree;
  466. tree.reserve(treesize);
  467. for (int i = 0; i < treesize; ++i) {
  468. Node node;
  469. if (bin) {
  470. is.read(reinterpret_cast<char *>(&node.index), sizeof(int));
  471. if (node.index >= 0) {
  472. is.read(reinterpret_cast<char *>(node.data.lower),
  473. 2 * sizeof(dist_t));
  474. is.read(reinterpret_cast<char *>(node.data.upper),
  475. 2 * sizeof(dist_t));
  476. is.read(reinterpret_cast<char *>(node.data.child),
  477. 2 * sizeof(int));
  478. } else {
  479. is.read(reinterpret_cast<char *>(node.leaves),
  480. bucket * sizeof(int));
  481. for (int l = bucket; l < maxbucket; ++l)
  482. node.leaves[l] = 0;
  483. }
  484. } else {
  485. if (!( is >> node.index ))
  486. throw GeographicLib::GeographicErr("Bad index");
  487. if (node.index >= 0) {
  488. for (int l = 0; l < 2; ++l) {
  489. if (!( is >> node.data.lower[l] >> node.data.upper[l]
  490. >> node.data.child[l] ))
  491. throw GeographicLib::GeographicErr("Bad node data");
  492. }
  493. } else {
  494. // Must be at least one valid leaf followed by a sequence end
  495. // markers (-1).
  496. for (int l = 0; l < bucket; ++l) {
  497. if (!( is >> node.leaves[l] ))
  498. throw GeographicLib::GeographicErr("Bad leaf data");
  499. }
  500. for (int l = bucket; l < maxbucket; ++l)
  501. node.leaves[l] = 0;
  502. }
  503. }
  504. node.Check(numpoints, treesize, bucket);
  505. tree.push_back(node);
  506. }
  507. _tree.swap(tree);
  508. _numpoints = numpoints;
  509. _bucket = bucket;
  510. _mc = _sc = 0;
  511. _cost = cost; _c1 = _k = _cmax = 0;
  512. _cmin = std::numeric_limits<int>::max();
  513. }
  514. /**
  515. * Write the object to stream \e os as text.
  516. *
  517. * @param[in,out] os the output stream.
  518. * @param[in] t the NearestNeighbor object to be saved.
  519. * @exception std::bad_alloc if memory for the string representation of the
  520. * object can't be allocated.
  521. **********************************************************************/
  522. friend std::ostream& operator<<(std::ostream& os, const NearestNeighbor& t)
  523. { t.Save(os, false); return os; }
  524. /**
  525. * Read the object from stream \e is as text.
  526. *
  527. * @param[in,out] is the input stream.
  528. * @param[out] t the NearestNeighbor object to be loaded.
  529. * @exception GeographicErr if the state read from \e is is illegal.
  530. * @exception std::bad_alloc if memory for the tree can't be allocated.
  531. **********************************************************************/
  532. friend std::istream& operator>>(std::istream& is, NearestNeighbor& t)
  533. { t.Load(is, false); return is; }
  534. /**
  535. * Swap with another NearestNeighbor object.
  536. *
  537. * @param[in,out] t the NearestNeighbor object to swap with.
  538. **********************************************************************/
  539. void swap(NearestNeighbor& t) {
  540. std::swap(_numpoints, t._numpoints);
  541. std::swap(_bucket, t._bucket);
  542. std::swap(_cost, t._cost);
  543. _tree.swap(t._tree);
  544. std::swap(_mc, t._mc);
  545. std::swap(_sc, t._sc);
  546. std::swap(_c1, t._c1);
  547. std::swap(_k, t._k);
  548. std::swap(_cmin, t._cmin);
  549. std::swap(_cmax, t._cmax);
  550. }
  551. /**
  552. * The accumulated statistics on the searches so far.
  553. *
  554. * @param[out] setupcost the cost of initializing the NearestNeighbor.
  555. * @param[out] numsearches the number of calls to Search().
  556. * @param[out] searchcost the total cost of the calls to Search().
  557. * @param[out] mincost the minimum cost of a Search().
  558. * @param[out] maxcost the maximum cost of a Search().
  559. * @param[out] mean the mean cost of a Search().
  560. * @param[out] sd the standard deviation in the cost of a Search().
  561. *
  562. * Here "cost" measures the number of distance calculations needed. Note
  563. * that the accumulation of statistics is \e not thread safe.
  564. **********************************************************************/
  565. void Statistics(int& setupcost, int& numsearches, int& searchcost,
  566. int& mincost, int& maxcost,
  567. double& mean, double& sd) const {
  568. setupcost = _cost; numsearches = _k; searchcost = _c1;
  569. mincost = _cmin; maxcost = _cmax;
  570. mean = _mc; sd = std::sqrt(_sc / (_k - 1));
  571. }
  572. /**
  573. * Reset the counters for the accumulated statistics on the searches so
  574. * far.
  575. **********************************************************************/
  576. void ResetStatistics() const {
  577. _mc = _sc = 0;
  578. _c1 = _k = _cmax = 0;
  579. _cmin = std::numeric_limits<int>::max();
  580. }
  581. private:
  582. // Package up a dist_t and an int. We will want to sort on the dist_t so
  583. // put it first.
  584. typedef std::pair<dist_t, int> item;
  585. // \cond SKIP
  586. class Node {
  587. public:
  588. struct bounds {
  589. dist_t lower[2], upper[2]; // bounds on inner/outer distances
  590. int child[2];
  591. };
  592. union {
  593. bounds data;
  594. int leaves[maxbucket];
  595. };
  596. int index;
  597. Node()
  598. : index(-1)
  599. {
  600. for (int i = 0; i < 2; ++i) {
  601. data.lower[i] = data.upper[i] = 0;
  602. data.child[i] = -1;
  603. }
  604. }
  605. // Sanity check on a Node
  606. void Check(int numpoints, int treesize, int bucket) const {
  607. if (!( -1 <= index && index < numpoints ))
  608. throw GeographicLib::GeographicErr("Bad index");
  609. if (index >= 0) {
  610. if (!( -1 <= data.child[0] && data.child[0] < treesize &&
  611. -1 <= data.child[1] && data.child[1] < treesize ))
  612. throw GeographicLib::GeographicErr("Bad child pointers");
  613. if (!( 0 <= data.lower[0] && data.lower[0] <= data.upper[0] &&
  614. data.upper[0] <= data.lower[1] &&
  615. data.lower[1] <= data.upper[1] ))
  616. throw GeographicLib::GeographicErr("Bad bounds");
  617. } else {
  618. // Must be at least one valid leaf followed by a sequence end markers
  619. // (-1).
  620. bool start = true;
  621. for (int l = 0; l < bucket; ++l) {
  622. if (!( (start ?
  623. ((l == 0 ? 0 : -1) <= leaves[l] && leaves[l] < numpoints) :
  624. leaves[l] == -1) ))
  625. throw GeographicLib::GeographicErr("Bad leaf data");
  626. start = leaves[l] >= 0;
  627. }
  628. for (int l = bucket; l < maxbucket; ++l) {
  629. if (leaves[l] != 0)
  630. throw GeographicLib::GeographicErr("Bad leaf data");
  631. }
  632. }
  633. }
  634. #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
  635. GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
  636. friend class boost::serialization::access;
  637. template<class Archive>
  638. void save(Archive& ar, const unsigned int) const {
  639. ar & boost::serialization::make_nvp("index", index);
  640. if (index < 0)
  641. ar & boost::serialization::make_nvp("leaves", leaves);
  642. else
  643. ar & boost::serialization::make_nvp("lower", data.lower)
  644. & boost::serialization::make_nvp("upper", data.upper)
  645. & boost::serialization::make_nvp("child", data.child);
  646. }
  647. template<class Archive>
  648. void load(Archive& ar, const unsigned int) {
  649. ar & boost::serialization::make_nvp("index", index);
  650. if (index < 0)
  651. ar & boost::serialization::make_nvp("leaves", leaves);
  652. else
  653. ar & boost::serialization::make_nvp("lower", data.lower)
  654. & boost::serialization::make_nvp("upper", data.upper)
  655. & boost::serialization::make_nvp("child", data.child);
  656. }
  657. template<class Archive>
  658. void serialize(Archive& ar, const unsigned int file_version)
  659. { boost::serialization::split_member(ar, *this, file_version); }
  660. #endif
  661. };
  662. // \endcond
  663. #if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
  664. GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
  665. friend class boost::serialization::access;
  666. template<class Archive> void save(Archive& ar, const unsigned) const {
  667. int realspec = std::numeric_limits<dist_t>::digits *
  668. (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
  669. // Need to use version1, otherwise load error in debug mode on Linux:
  670. // undefined reference to GeographicLib::NearestNeighbor<...>::version.
  671. int version1 = version;
  672. ar & boost::serialization::make_nvp("version", version1)
  673. & boost::serialization::make_nvp("realspec", realspec)
  674. & boost::serialization::make_nvp("bucket", _bucket)
  675. & boost::serialization::make_nvp("numpoints", _numpoints)
  676. & boost::serialization::make_nvp("cost", _cost)
  677. & boost::serialization::make_nvp("tree", _tree);
  678. }
  679. template<class Archive> void load(Archive& ar, const unsigned) {
  680. int version1, realspec, bucket, numpoints, cost;
  681. ar & boost::serialization::make_nvp("version", version1);
  682. if (version1 != version)
  683. throw GeographicLib::GeographicErr("Incompatible version");
  684. std::vector<Node> tree;
  685. ar & boost::serialization::make_nvp("realspec", realspec);
  686. if (!( realspec == std::numeric_limits<dist_t>::digits *
  687. (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
  688. throw GeographicLib::GeographicErr("Different dist_t types");
  689. ar & boost::serialization::make_nvp("bucket", bucket);
  690. if (!( 0 <= bucket && bucket <= maxbucket ))
  691. throw GeographicLib::GeographicErr("Bad bucket size");
  692. ar & boost::serialization::make_nvp("numpoints", numpoints)
  693. & boost::serialization::make_nvp("cost", cost)
  694. & boost::serialization::make_nvp("tree", tree);
  695. if (!( 0 <= int(tree.size()) && int(tree.size()) <= numpoints ))
  696. throw
  697. GeographicLib::GeographicErr("Bad number of points or tree size");
  698. for (int i = 0; i < int(tree.size()); ++i)
  699. tree[i].Check(numpoints, int(tree.size()), bucket);
  700. _tree.swap(tree);
  701. _numpoints = numpoints;
  702. _bucket = bucket;
  703. _mc = _sc = 0;
  704. _cost = cost; _c1 = _k = _cmax = 0;
  705. _cmin = std::numeric_limits<int>::max();
  706. }
  707. template<class Archive>
  708. void serialize(Archive& ar, const unsigned int file_version)
  709. { boost::serialization::split_member(ar, *this, file_version); }
  710. #endif
  711. int _numpoints, _bucket, _cost;
  712. std::vector<Node> _tree;
  713. // Counters to track stastistics on the cost of searches
  714. mutable double _mc, _sc;
  715. mutable int _c1, _k, _cmin, _cmax;
  716. int init(const std::vector<pos_t>& pts, const distfun_t& dist, int bucket,
  717. std::vector<Node>& tree, std::vector<item>& ids, int& cost,
  718. int l, int u, int vp) {
  719. if (u == l)
  720. return -1;
  721. Node node;
  722. if (u - l > (bucket == 0 ? 1 : bucket)) {
  723. // choose a vantage point and move it to the start
  724. int i = vp;
  725. std::swap(ids[l], ids[i]);
  726. int m = (u + l + 1) / 2;
  727. for (int k = l + 1; k < u; ++k) {
  728. ids[k].first = dist(pts[ids[l].second], pts[ids[k].second]);
  729. ++cost;
  730. }
  731. // partition around the median distance
  732. std::nth_element(ids.begin() + l + 1,
  733. ids.begin() + m,
  734. ids.begin() + u);
  735. node.index = ids[l].second;
  736. if (m > l + 1) { // node.child[0] is possibly empty
  737. typename std::vector<item>::iterator
  738. t = std::min_element(ids.begin() + l + 1, ids.begin() + m);
  739. node.data.lower[0] = t->first;
  740. t = std::max_element(ids.begin() + l + 1, ids.begin() + m);
  741. node.data.upper[0] = t->first;
  742. // Use point with max distance as vantage point; this point act as a
  743. // "corner" point and leads to a good partition.
  744. node.data.child[0] = init(pts, dist, bucket, tree, ids, cost,
  745. l + 1, m, int(t - ids.begin()));
  746. }
  747. typename std::vector<item>::iterator
  748. t = std::max_element(ids.begin() + m, ids.begin() + u);
  749. node.data.lower[1] = ids[m].first;
  750. node.data.upper[1] = t->first;
  751. // Use point with max distance as vantage point here too
  752. node.data.child[1] = init(pts, dist, bucket, tree, ids, cost,
  753. m, u, int(t - ids.begin()));
  754. } else {
  755. if (bucket == 0)
  756. node.index = ids[l].second;
  757. else {
  758. node.index = -1;
  759. // Sort the bucket entries so that the tree is independent of the
  760. // implementation of nth_element.
  761. std::sort(ids.begin() + l, ids.begin() + u);
  762. for (int i = l; i < u; ++i)
  763. node.leaves[i-l] = ids[i].second;
  764. for (int i = u - l; i < bucket; ++i)
  765. node.leaves[i] = -1;
  766. for (int i = bucket; i < maxbucket; ++i)
  767. node.leaves[i] = 0;
  768. }
  769. }
  770. tree.push_back(node);
  771. return int(tree.size()) - 1;
  772. }
  773. };
  774. } // namespace GeographicLib
  775. namespace std {
  776. /**
  777. * Swap two GeographicLib::NearestNeighbor objects.
  778. *
  779. * @tparam dist_t the type used for measuring distances.
  780. * @tparam pos_t the type for specifying the positions of points.
  781. * @tparam distfun_t the type for a function object which calculates
  782. * distances between points.
  783. * @param[in,out] a the first GeographicLib::NearestNeighbor to swap.
  784. * @param[in,out] b the second GeographicLib::NearestNeighbor to swap.
  785. **********************************************************************/
  786. template <typename dist_t, typename pos_t, class distfun_t>
  787. void swap(GeographicLib::NearestNeighbor<dist_t, pos_t, distfun_t>& a,
  788. GeographicLib::NearestNeighbor<dist_t, pos_t, distfun_t>& b) {
  789. a.swap(b);
  790. }
  791. } // namespace std
  792. #if defined(_MSC_VER)
  793. # pragma warning (pop)
  794. #endif
  795. #endif // GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP