casacore
Loading...
Searching...
No Matches
Sort.h
Go to the documentation of this file.
1//# Sort.h: Sort objects on one or more keys
2//# Copyright (C) 1995,1996,1997,1998,1999,2000,2001
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: aips2-request@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25//#
26//#
27//# $Id$
28
29#ifndef CASA_SORT_H
30#define CASA_SORT_H
31
32//# Includes
33#include <casacore/casa/aips.h>
34#include <casacore/casa/Arrays/ArrayFwd.h>
35#include <casacore/casa/Containers/Block.h>
36#include <casacore/casa/Utilities/ValType.h>
37#include <casacore/casa/Utilities/Compare.h>
38#include <casacore/casa/Utilities/CountedPtr.h>
39
40namespace casacore { //# NAMESPACE CASACORE - BEGIN
41
42// <summary> Define a Sort key </summary>
43// <use visibility=local>
44// <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
45// </reviewed>
46
47// <synopsis>
48// SortKey is a helper class for the <linkto class=Sort>Sort</linkto> class.
49// It holds the following information about a sort key:
50// <ul>
51// <li> Address of the data array containing the sort key;
52// <li> A CountedPtr to a comparison object to be used
53// (of a class derived from the abstract base class BaseCompare).
54// <li> Increment for the next data point -- this lets you specify a
55// stride for keys embedded in a struct;
56// <li> Sort order -- ascending or descending;
57// </ul>
58// </synopsis>
59
61{
62public:
63 friend class Sort;
64
65 // Define a sort key in a given data array using the indicated
66 // comparison object, stride and sort order.
67 SortKey (const void* data, const CountedPtr<BaseCompare>&,
68 uInt increment, int order);
69
70 // Copy constructor (copy semantics).
71 SortKey (const SortKey&);
72
74
75 // Assignment (copy semantics).
77
78 // Try if GenSort can be used for this single key.
79 // If it succeeds, it returns the resulting number of elements.
80 // Otherwise it returns 0.
81 uInt tryGenSort (Vector<uInt>& indexVector, uInt nrrec, int opt) const;
82 uInt64 tryGenSort (Vector<uInt64>& indexVector, uInt64 nrrec, int opt) const;
83
84 // Get the sort order.
85 int order() const
86 { return order_p; }
87
88protected:
89 // sort order; -1 = ascending, 1 = descending
91 // address of first data point
92 const void* data_p;
93 // increment for next data point
95 // comparison object; use CountedPtr for memory management
97 // comparison object; use raw pointer for performance
99};
100
101
102
103// <summary> Sort on one or more keys, ascending and/or descending </summary>
104// <use visibility=export>
105// <reviewed reviewer="Friso Olnon" date="1995/03/01" tests="tSort, tSort_1">
106// </reviewed>
107
108// <synopsis>
109// <src>Sort</src> lets you sort data on one or more keys in a mix of
110// <src>Sort::ascending</src> and <src>Sort::descending</src> order.
111// Duplicates can be skipped by giving the option
112// <src>Sort::NoDuplicates</src>. Only in this case the number of output
113// elements can be different from the number of input elements.
114// <br>The <src>unique</src> function offers another way of getting
115// unique values.
116// <p>
117// Class <src>Sort</src> does not sort the data themselves, but
118// returns an index to them. This gives more flexibility and
119// allows the sort to be stable; but it is slower.
120// <br>Very fast sorting of the data themselves can be done with the
121// functions in class <linkto class=GenSort>GenSort</linkto>.
122// If sorting on a single key with a standard data type is done,
123// Sort will use GenSortIndirect to speed up the sort.
124// <br>
125// Four sort algorithms are provided:
126// <DL>
127// <DT> <src>Sort::ParSort</src>
128// <DD> The parallel merge sort is the fastest if it can use multiple threads.
129// For a single thread it has O(n*log(n)) behaviour, but is slower
130// than quicksort.
131// A drawback is that it needs an extra index array to do the merge.
132// <DT> <src>Sort::InsSort</src>
133// <DD> Insertion sort has O(n*n) behaviour, thus is very slow for large
134// arrays. However, it is the fastest method for small arrays
135// (< 50 elements) and for arrays already (almost) in the right order.
136// <DT> <src>Sort::QuickSort</src>
137// <DD> Care has been taken to solve the well-known quicksort problems
138// like "array already in order" or "many equal elements". The
139// behaviour is O(n*log(n)) in all the cases tested, even in
140// degenerated cases where the SUN Solaris qsort algorithm is O(n*n).
141// <DT> <src>Sort::HeapSort</src>
142// <DD> Heapsort has O(n*log(n)) behaviour. Its speed is lower than
143// that of QuickSort, so QuickSort is the default algorithm.
144// </DL>
145// The default is to use QuickSort for small arrays or if only a single
146// thread can be used. Otherwise ParSort is the default.
147//
148// All sort algorithms are <em>stable</em>, which means that the original
149// order is kept when keys are equal.
150//
151// The sort is a four step process:
152// <ol>
153// <li> Construct the <src>Sort</src> object.
154// <li> Define the sort keys. The function <src>sortKey</src> must be
155// called for each sort key (the most significant one first).
156// The comparison object can be passed in directly, or a
157// <linkto group="DataType.h#DataType">basic data type</linkto>
158// can be given. In the latter case the appropriate ObjCompare
159// comparison object will be created.
160// <li> Sort the data. The function <src>sort</src> returns an index
161// array, which is allocated when needed.
162// <li> Destruct the <src>Sort</src> object (usually done automatically)
163// and delete the index array.
164// </ol>
165// The data can be in a single array of structs, in separate arrays, or
166// in a mix of those. Of course, all arrays must have the same length.
167// The data can be passed to the <src>Sort</src> constructor and/or to the
168// <src>sortKey</src> function. If passed to the <src>Sort</src> constructor,
169// the offset of the data item in the data array must be given to
170// <src>sortKey</src>.
171// </synopsis>
172
173// <example>
174// In the first example we sort the data contained in two "parallel"
175// arrays, <src>idata</src> and <src>ddata</src>, both of length
176// <src>nrdata</src>.
177// <srcblock>
178// Sort sort;
179// sort.sortKey (idata, TpInt); // define 1st sort key
180// sort.sortKey (ddata, TpDouble,0,Sort::Descending); // define 2nd sort key
181// Vector<uInt> inx;
182// sort.sort (inx, nrdata);
183// for (uInt i=0; i<nrdata; i++) { // show sorted data
184// cout << idata[inx[i]] << " " << ddata[inx[i]] << endl;
185// }
186// </srcblock>
187// Now <src>nr</src> contains the nr of records (=<src>nrdata</src>)
188// and <src>inx</src> an array of (sorted) indices.
189//
190// In the second example we sort the data stored in an array of structs
191// on the double (ascending) and the string (descending). We can pass
192// the data to the <src>Sort</src> constructor, and the offsets of the
193// struct elements to the <src>sortKey</src> function.
194// <srcblock>
195// struct Ts {
196// String as;
197// double ad;
198// }
199// Vector<uInt> inx;
200// Sort sort (tsarr, sizeof(Ts));
201// sort.sortKey ((char*)&tsarr[0].ad - (char*)tsarr, TpDouble);
202// sort.sortKey ((char*)&tsarr[0].as - (char*)tsarr, TpString,
203// Sort::Descending);
204// sort.sort (inx, nrts);
205// </srcblock>
206// Note that the first argument in function <src>sortKey</src> gives
207// the offset of the variable in the struct.
208//
209// Alternatively, and probably slightly easier, we could pass the data
210// to the <src>sortKey</src> function and use an increment:
211// <srcblock>
212// struct Ts {
213// String as;
214// double ad;
215// }
216// Vector<uInt> inx;
217// Sort sort;
218// sort.sortKey (&tsarr[0].ad, TpDouble, sizeof(Ts));
219// sort.sortKey (&tsarr[0].as, TpString, sizeof(Ts), Sort::Descending);
220// sort.sort (inx, nrts);
221// </srcblock>
222//
223// Finally, we could provide a comparison object for the struct.
224// <srcblock>
225// struct Ts {
226// String as;
227// double ad;
228// }
229// class MyCompare: public BaseCompare {
230// virtual int comp (const void* val1, const void* val2) const
231// {
232// const Ts& t1 = *(Ts*)val1;
233// const Ts& t2 = *(Ts*)val2;
234// if (t1.ad < t2.ad) return -1;
235// if (t1.ad > t2.ad) return 1;
236// if (t1.as < t2.as) return 1; // string must be descending
237// if (t1.as > t2.as) return -1;
238// return 0;
239// }
240// };
241// Vector<uInt> inx;
242// Sort sort;
243// sort.sortKey (tsarr, compareTs, sizeof(Ts));
244// sort.sort (inx, nrts);
245// </srcblock>
246
247class Sort
248{
249public:
250 // Enumerate the sort options:
251 enum Option {DefaultSort=0, // ParSort, but QuickSort for small array
252 HeapSort=1, // use Heapsort algorithm
253 InsSort=2, // use insertion sort algorithm
254 QuickSort=4, // use Quicksort algorithm
255 ParSort=8, // use parallel merge sort algorithm
256 NoDuplicates=16}; // skip data with equal sort keys
257
258 // Enumerate the sort order:
259 enum Order {Ascending=-1,
261
262 // The default constructor can be used when the data is only passed
263 // in via function <src>sortKey</src>.
265
266 // Construct a Sort object for the given data array with elements
267 // of <src>elementSize</src> bytes. This data array will be used
268 // when an offset is given to the <src>sortKey</src> functions.
269 // You can still pass additional data arrays to the
270 // <src>sortKey</src> functions.
271 Sort (const void* data, uInt elementSize);
272
273 // Copy constructor (copy semantics).
274 Sort (const Sort&);
275
277
278 // Assignment (copy semantics).
280
281 // Define a sort key (the most significant key should be defined first).
282 // The key contains:
283 // <ul>
284 // <li> A pointer to the start of the data array. --- When structs are
285 // sorted on an element in the struct, the pointer must point to
286 // that element in the first struct.
287 // <li> A pointer to the comparison object to be used. --- The
288 // comparison object can be specified in two ways:
289 // <ul>
290 // <li> by giving a
291 // <linkto group="DataType.h#DataType">basic data type</linkto>,
292 // in which case the appropriate comparison object will be
293 // created automatically, or
294 // <li> by a CountedPtr of a comparison object.
295 // You may want to use the templated comparison classes
296 // <linkto class=ObjCompare>ObjCompare</linkto>(),
297 // but you are free to use any other class derived from BaseCompare
298 // that implements the <src>comp</src> function.
299 // </ul>
300 // <li> The increment from one data element to the next. --- When
301 // structs are sorted on an element in the struct, the increment
302 // should be the size of the struct. If the comparison object is
303 // automatically created using the data type specified, the default
304 // increment is the size of the data type.
305 // <li> The sort order. --- <src>Ascending</src> (default) or
306 // <src>Descending</src>;
307 // </ul>
308 //
309 // When the data array has been passed to the Sort constructor,
310 // the data pointer and the increment arguments can be replaced by a
311 // single argument: the offset of the key in each element of the array.
312 //
313 // <group>
314 void sortKey (const void* data, DataType, uInt increment = 0,
315 Order = Ascending);
316 void sortKey (const void* data, const CountedPtr<BaseCompare>&,
317 uInt increment, Order = Ascending);
318 void sortKey (uInt offset, DataType, Order = Ascending);
320 Order = Ascending);
321 // </group>
322
323 // Sort the data array of <src>nrrec</src> records.
324 // The result is an array of indices giving the requested order.
325 // It returns the number of resulting records. The indices array
326 // is resized to that number.
327 // <br> By default it'll try if the faster GenSortIndirect can be used
328 // if a sort on a single key is used.
329 uInt sort (Vector<uInt>& indexVector, uInt nrrec,
330 int options = DefaultSort, Bool tryGenSort = True) const;
331 uInt64 sort (Vector<uInt64>& indexVector, uInt64 nrrec,
332 int options = DefaultSort, Bool tryGenSort = True) const;
333
334 // Get all unique records in a sorted array. The array order is
335 // given in the indexVector (as possibly returned by the sort function).
336 // The default indexVector is 0..nrrec-1.
337 // The index of each first unique record is returned in the uniqueVector.
338 // They are indices in the supplied indexVector, so
339 // <src>data[indexVector(uniqueVector(i))]</src>
340 // is giving the i-th unique record.
341 // Note that the records indexed by <src>indexVector(uniqueVector(i))</src>
342 // till <src>indexVector(uniqueVector(i+1))</src> are all the same.
343 // <br>
344 // It returns the number of unique records. The unique array
345 // is resized to that number.
346 // The third version also gives back a vector with the keys that
347 // change in each sorting group. The size of changeKey is the same as
348 // uniqueVector, and for each unique sorting group indicates the index
349 // of the keyword that will change at the end of the group.
350 // <group>
351 uInt unique (Vector<uInt>& uniqueVector, uInt nrrec) const;
352 uInt unique (Vector<uInt>& uniqueVector,
353 const Vector<uInt>& indexVector) const;
354 uInt unique (Vector<uInt>& uniqueVector,
355 Vector<size_t>& changeKey,
356 const Vector<uInt>& indexVector) const;
357 uInt64 unique (Vector<uInt64>& uniqueVector, uInt64 nrrec) const;
359 const Vector<uInt64>& indexVector) const;
361 Vector<size_t>& changeKey,
362 const Vector<uInt64>& indexVector) const;
363 // </group>
364
365private:
366 template<typename T>
367 T doSort (Vector<T>& indexVector, T nrrec,
368 int options = DefaultSort, Bool tryGenSort = True) const;
369
370 template <typename T>
371 T doUnique (Vector<T>& uniqueVector, T nrrec) const;
372 template <typename T>
373 T doUnique (Vector<T>& uniqueVector, const Vector<T>& indexVector) const;
374 template <typename T>
375 T doUnique (Vector<T>& uniqueVector, Vector<size_t>& changeKey,
376 const Vector<T>& indexVector) const;
377
378 // Copy that Sort object to this.
379 void copy (const Sort& that);
380
381 // Add a sort key giving a data type and stride or the sort key.
382 // <group>
383 void addKey (const void* data, DataType, uInt increment, int options);
385 // </group>
386
387 // Do an insertion sort, optionally skipping duplicates.
388 // <group>
389 template<typename T>
390 T insSort (T nr, T* indices) const;
391 template<typename T>
392 T insSortNoDup (T nr, T* indices) const;
393 // </group>
394
395 // Do a merge sort, if possible in parallel using OpenMP.
396 // Note that the env.var. OMP_NUM_TRHEADS sets the maximum nr of threads
397 // to use. It defaults to the number of cores.
398 template<typename T>
399 T parSort (int nthr, T nrrec, T* inx) const;
400 template<typename T>
401 void merge (T* inx, T* tmp, T size, T* index,
402 T nparts) const;
403
404 // Do a quicksort, optionally skipping duplicates
405 // (qkSort is the actual quicksort function).
406 // <group>
407 template<typename T>
408 T quickSort (T nr, T* indices) const;
409 template<typename T>
410 T quickSortNoDup (T nr, T* indices) const;
411 template<typename T>
412 void qkSort (T nr, T* indices) const;
413 // </group>
414
415 // Do a heapsort, optionally skipping duplicates.
416 // <group>
417 template<typename T>
418 T heapSort (T nr, T* indices) const;
419 template<typename T>
420 T heapSortNoDup (T nr, T* indices) const;
421 // </group>
422
423 // Siftdown algorithm for heapsort.
424 template<typename T>
425 void siftDown (T low, T up, T* indices) const;
426
427 // Compare 2 records based on the comparison functions
428 template<typename T>
429 int compare (T index1, T index2) const;
430
431 // As compare() but it also gives back the index of the first comparison
432 // function that didn't match.
433 template<typename T>
434 int compareChangeIdx(T i1, T i2, size_t& idxComp) const;
435
436 // Swap 2 indices.
437 template<typename T>
438 inline void swap (T index1, T index2, T* indices) const
439 {
440 T t = indices[index1];
441 indices[index1] = indices[index2];
442 indices[index2] = t;
443 }
444
445 //# Data memebers
446 PtrBlock<SortKey*> keys_p; //# keys to sort on
447 size_t nrkey_p; //# #sort-keys
448 const void* data_p; //# pointer to data records
449 uInt size_p; //# size of data record
450 int order_p; //# -1=asc 0=mixed 1=desc
451};
452
453
454} //# NAMESPACE CASACORE - END
455
456#endif
abstract base class for comparing two objects
Definition Compare.h:65
Referenced counted pointer for constant data.
Definition CountedPtr.h:81
A drop-in replacement for Block<T*>.
Definition Block.h:814
int order() const
Get the sort order.
Definition Sort.h:85
uInt tryGenSort(Vector< uInt > &indexVector, uInt nrrec, int opt) const
Try if GenSort can be used for this single key.
BaseCompare * cmpObj_p
comparison object; use raw pointer for performance
Definition Sort.h:98
SortKey(const void *data, const CountedPtr< BaseCompare > &, uInt increment, int order)
Define a sort key in a given data array using the indicated comparison object, stride and sort order.
CountedPtr< BaseCompare > ccmpObj_p
comparison object; use CountedPtr for memory management
Definition Sort.h:96
uInt64 tryGenSort(Vector< uInt64 > &indexVector, uInt64 nrrec, int opt) const
SortKey & operator=(const SortKey &)
Assignment (copy semantics).
SortKey(const SortKey &)
Copy constructor (copy semantics).
const void * data_p
address of first data point
Definition Sort.h:92
int order_p
sort order; -1 = ascending, 1 = descending
Definition Sort.h:90
uInt incr_p
increment for next data point
Definition Sort.h:94
Sort on one or more keys, ascending and/or descending.
Definition Sort.h:248
uInt64 unique(Vector< uInt64 > &uniqueVector, const Vector< uInt64 > &indexVector) const
Option
Enumerate the sort options:
Definition Sort.h:251
T heapSortNoDup(T nr, T *indices) const
const void * data_p
Definition Sort.h:448
void addKey(const void *data, DataType, uInt increment, int options)
Add a sort key giving a data type and stride or the sort key.
uInt64 sort(Vector< uInt64 > &indexVector, uInt64 nrrec, int options=DefaultSort, Bool tryGenSort=True) const
int compareChangeIdx(T i1, T i2, size_t &idxComp) const
As compare() but it also gives back the index of the first comparison function that didn't match.
T parSort(int nthr, T nrrec, T *inx) const
Do a merge sort, if possible in parallel using OpenMP.
void sortKey(uInt offset, const CountedPtr< BaseCompare > &, Order=Ascending)
uInt size_p
Definition Sort.h:449
T doUnique(Vector< T > &uniqueVector, Vector< size_t > &changeKey, const Vector< T > &indexVector) const
size_t nrkey_p
Definition Sort.h:447
void copy(const Sort &that)
Copy that Sort object to this.
uInt unique(Vector< uInt > &uniqueVector, uInt nrrec) const
Get all unique records in a sorted array.
T insSort(T nr, T *indices) const
Do an insertion sort, optionally skipping duplicates.
uInt unique(Vector< uInt > &uniqueVector, const Vector< uInt > &indexVector) const
T doSort(Vector< T > &indexVector, T nrrec, int options=DefaultSort, Bool tryGenSort=True) const
uInt unique(Vector< uInt > &uniqueVector, Vector< size_t > &changeKey, const Vector< uInt > &indexVector) const
T quickSort(T nr, T *indices) const
Do a quicksort, optionally skipping duplicates (qkSort is the actual quicksort function).
Sort & operator=(const Sort &)
Assignment (copy semantics).
void merge(T *inx, T *tmp, T size, T *index, T nparts) const
int compare(T index1, T index2) const
Compare 2 records based on the comparison functions.
T heapSort(T nr, T *indices) const
Do a heapsort, optionally skipping duplicates.
void sortKey(uInt offset, DataType, Order=Ascending)
void sortKey(const void *data, DataType, uInt increment=0, Order=Ascending)
Define a sort key (the most significant key should be defined first).
uInt64 unique(Vector< uInt64 > &uniqueVector, uInt64 nrrec) const
Order
Enumerate the sort order:
Definition Sort.h:259
T quickSortNoDup(T nr, T *indices) const
void swap(T index1, T index2, T *indices) const
Swap 2 indices.
Definition Sort.h:438
T doUnique(Vector< T > &uniqueVector, const Vector< T > &indexVector) const
Sort(const void *data, uInt elementSize)
Construct a Sort object for the given data array with elements of elementSize bytes.
T doUnique(Vector< T > &uniqueVector, T nrrec) const
uInt sort(Vector< uInt > &indexVector, uInt nrrec, int options=DefaultSort, Bool tryGenSort=True) const
Sort the data array of nrrec records.
uInt64 unique(Vector< uInt64 > &uniqueVector, Vector< size_t > &changeKey, const Vector< uInt64 > &indexVector) const
T insSortNoDup(T nr, T *indices) const
void addKey(SortKey *)
Sort()
The default constructor can be used when the data is only passed in via function sortKey.
PtrBlock< SortKey * > keys_p
Definition Sort.h:446
Sort(const Sort &)
Copy constructor (copy semantics).
void qkSort(T nr, T *indices) const
void sortKey(const void *data, const CountedPtr< BaseCompare > &, uInt increment, Order=Ascending)
int order_p
Definition Sort.h:450
void siftDown(T low, T up, T *indices) const
Siftdown algorithm for heapsort.
this file contains all the compiler specific defines
Definition mainpage.dox:28
unsigned int uInt
Definition aipstype.h:51
bool Bool
Define the standard types used by Casacore.
Definition aipstype.h:42
const Bool True
Definition aipstype.h:43
unsigned long long uInt64
Definition aipsxtype.h:39