001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the files COPYING and Copyright.html. *
009 * COPYING can be found at the root of the source code distribution tree.    *
010 * Or, see http://hdfgroup.org/products/hdf-java/doc/Copyright.html.         *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.util.Vector;
019
020/**
021 * The abstract class provides general APIs to create and manipulate dataset
022 * objects, and retrieve dataset properties datatype and dimension sizes.
023 * <p>
024 * This class provides two convenient functions, read()/write(), to read/write
025 * data values. Reading/writing data may take many library calls if we use the
026 * library APIs directly. The read() and write functions hide all the details of
027 * these calls from users.
028 * 
029 * @see hdf.object.ScalarDS
030 * @see hdf.object.CompoundDS
031 * 
032 * @version 1.1 9/4/2007
033 * @author Peter X. Cao
034 */
035public abstract class Dataset extends HObject {
036    /**
037     * 
038     */
039    private static final long serialVersionUID    = -3360885430038261178L;
040
041    private final static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class);
042
043    /**
044     * The memory buffer that holds the raw data of the dataset.
045     */
046    protected Object          data;
047
048    /**
049     * The number of dimensions of the dataset.
050     */
051    protected int             rank;
052
053    /**
054     * The current dimension sizes of the dataset
055     */
056    protected long[]          dims;
057
058    /**
059     * The max dimension sizes of the dataset
060     */
061    protected long[]          maxDims;
062
063    /**
064     * Array that contains the number of data points selected (for read/write)
065     * in each dimension.
066     * <p>
067     * The select size must be less than or equal to the current dimension size.
068     * A subset of a rectangle selection is defined by the starting position and
069     * selected sizes.
070     * <p>
071     * For example, a 4 X 5 dataset
072     * 
073     * <pre>
074     *     0,  1,  2,  3,  4
075     *    10, 11, 12, 13, 14
076     *    20, 21, 22, 23, 24
077     *    30, 31, 32, 33, 34
078     * long[] dims = {4, 5};
079     * long[] startDims = {1, 2};
080     * long[] selectedDims = {3, 3};
081     * then the following subset is selected by the startDims and selectedDims above
082     *     12, 13, 14
083     *     22, 23, 24
084     * 32, 33, 34
085     */
086    protected long[]          selectedDims;
087
088    /**
089     * The starting position of each dimension of a selected subset. With both
090     * the starting position and selected sizes, the subset of a rectangle
091     * selection is fully defined.
092     */
093    protected long[]          startDims;
094
095    /**
096     * Array that contains the indices of the dimensions selected for display.
097     * <p>
098     * <B>selectedIndex[] is provided for two purpose:</B>
099     * <OL>
100     * <LI>
101     * selectedIndex[] is used to indicate the order of dimensions for display,
102     * i.e. selectedIndex[0] = row, selectedIndex[1] = column and
103     * selectedIndex[2] = depth. For example, for a four dimension dataset, if
104     * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index,
105     * dim[2] is selected as column index and dim[3] is selected as depth index.
106     * <LI>
107     * selectedIndex[] is also used to select dimensions for display for
108     * datasets with three or more dimensions. We assume that applications such
109     * as HDFView can only display data up to three dimensions (a 2D
110     * spreadsheet/image with a third dimension that the 2D spreadsheet/image is
111     * cut from). For dataset with more than three dimensions, we need
112     * selectedIndex[] to store which three dimensions are chosen for display.
113     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
114     * then dim[1] is selected as row index, dim[2] is selected as column index
115     * and dim[3] is selected as depth index. dim[0] is not selected. Its
116     * location is fixed at 0 by default.
117     * </OL>
118     */
119    protected final int[]     selectedIndex;
120
121    /**
122     * The number of elements to move from the start location in each dimension.
123     * For example, if selectedStride[0] = 2, every other data point is selected
124     * along dim[0].
125     */
126    protected long[]          selectedStride;
127
128    /**
129     * The array of dimension sizes for a chunk.
130     */
131    protected long[]          chunkSize;
132
133    /** The compression information. */
134    protected String          compression;
135    public final static String          compression_gzip_txt = "GZIP: level = ";
136
137    /** The filters information. */
138    protected String          filters;
139
140    /** The storage information. */
141    protected String          storage;
142
143    /** The datatype object of the dataset. */
144    protected Datatype        datatype;
145
146    /**
147     * Array of strings that represent the dimension names. It is null if
148     * dimension names do not exist.
149     */
150    protected String[]        dimNames;
151
152    /** Flag to indicate if the byte[] array is converted to strings */
153    protected boolean         convertByteToString = true;
154
155    /** Flag to indicate if data values are loaded into memory. */
156    protected boolean         isDataLoaded        = false;
157
158    /** The number of data points in the memory buffer. */
159    protected long            nPoints             = 1;
160
161    /**
162     * The data buffer that contains the raw data directly reading from file
163     * (before any data conversion).
164     */
165    protected Object          originalBuf         = null;
166
167    /**
168     * The array that holds the converted data of unsigned C-type integers.
169     * <p>
170     * For example, Suppose that the original data is an array of unsigned
171     * 16-bit short integers. Since Java does not support unsigned integer, the
172     * data is converted to an array of 32-bit singed integer. In that case, the
173     * converted buffer is the array of 32-bit singed integer.
174     */
175    protected Object          convertedBuf        = null;
176
177    /**
178     * Flag to indicate if the enum data is converted to strings.
179     */
180    protected boolean         enumConverted       = false;
181
182    /**
183     * Constructs a Dataset object with a given file, name and path.
184     * <p>
185     * 
186     * @param theFile
187     *            the file that contains the dataset.
188     * @param name
189     *            the name of the Dataset, e.g. "dset1".
190     * @param path
191     *            the full group path of this Dataset, e.g. "/arrays/".
192     */
193    public Dataset(FileFormat theFile, String name, String path) {
194        this(theFile, name, path, null);
195    }
196
197    /**
198     * @deprecated Not for public use in the future. <br>
199     *             Using {@link #Dataset(FileFormat, String, String)}
200     */
201    @Deprecated
202    public Dataset(FileFormat theFile, String name, String path, long[] oid) {
203        super(theFile, name, path, oid);
204
205        rank = 0;
206        data = null;
207        dims = null;
208        maxDims = null;
209        selectedDims = null;
210        startDims = null;
211        selectedStride = null;
212        chunkSize = null;
213        compression = "NONE";
214        filters = "NONE";
215        storage = "NONE";
216        dimNames = null;
217
218        selectedIndex = new int[3];
219        selectedIndex[0] = 0;
220        selectedIndex[1] = 1;
221        selectedIndex[2] = 2;
222    }
223
224    /**
225     * Clears memory held by the dataset, such as data buffer.
226     */
227    public void clear() {
228        if (data != null) {
229            if (data instanceof Vector) {
230                ((Vector) data).setSize(0);
231            }
232            data = null;
233            originalBuf = null;
234            convertedBuf = null;
235        }
236        isDataLoaded = false;
237    }
238
239    /**
240     * Retrieves datatype and dataspace information from file and sets the
241     * dataset in memory.
242     * <p>
243     * The init() is designed to support lazy operation in dataset object. When
244     * a data object is retrieved from file, the datatype, dataspace and raw
245     * data are not loaded into memory. When it is asked to read the raw data
246     * from file, init() is first called to get the datatype and dataspace
247     * information, then load the raw data from file.
248     * <p>
249     * init() is also used to reset selection of a dataset (start, stride and
250     * count) to the default, which is the entire dataset for 1D or 2D datasets.
251     * In the following example, init() at step 1) retrieve datatype and
252     * dataspace information from file. getData() at step 3) read only one data
253     * point. init() at step 4) reset the selection to the whole dataset.
254     * getData() at step 4) reads the values of whole dataset into memory.
255     * 
256     * <pre>
257     * dset = (Dataset) file.get(NAME_DATASET);
258     * 
259     * // 1) get datatype and dataspace information from file
260     * dset.init();
261     * rank = dset.getRank(); // rank = 2, a 2D dataset
262     * count = dset.getSelectedDims();
263     * start = dset.getStartDims();
264     * dims = dset.getDims();
265     * 
266     * // 2) select only one data point
267     * for (int i = 0; i &lt; rank; i++) {
268     *     start[0] = 0;
269     *     count[i] = 1;
270     * }
271     * 
272     * // 3) read one data point
273     * data = dset.getData();
274     * 
275     * // 4) reset to select the whole dataset
276     * dset.init();
277     * 
278     * // 5) clean the memory data buffer
279     * dset.clearData();
280     * 
281     * // 6) Read the whole dataset
282     * data = dset.getData();
283     * </pre>
284     */
285    public abstract void init();
286
287    /**
288     * Returns the rank (number of dimensions) of the dataset.
289     * 
290     * @return the number of dimensions of the dataset.
291     */
292    public final int getRank() {
293        if (rank < 0) init();
294
295        return rank;
296    }
297
298    /**
299     * Returns the array that contains the dimension sizes of the dataset.
300     * 
301     * @return the dimension sizes of the dataset.
302     */
303    public final long[] getDims() {
304        if (rank < 0) init();
305
306        return dims;
307    }
308
309    /**
310     * Returns the array that contains the max dimension sizes of the dataset.
311     * 
312     * @return the max dimension sizes of the dataset.
313     */
314    public final long[] getMaxDims() {
315        if (rank < 0) init();
316
317        if (maxDims == null) return dims;
318
319        return maxDims;
320    }
321
322    /**
323     * Returns the dimension sizes of the selected subset.
324     * <p>
325     * The SelectedDims is the number of data points of the selected subset.
326     * Applications can use this array to change the size of selected subset.
327     * 
328     * The select size must be less than or equal to the current dimension size.
329     * Combined with the starting position, selected sizes and stride, the
330     * subset of a rectangle selection is fully defined.
331     * <p>
332     * For example, a 4 X 5 dataset
333     * 
334     * <pre>
335     *     0,  1,  2,  3,  4
336     *    10, 11, 12, 13, 14
337     *    20, 21, 22, 23, 24
338     *    30, 31, 32, 33, 34
339     * long[] dims = {4, 5};
340     * long[] startDims = {1, 2};
341     * long[] selectedDims = {3, 3};
342     * long[] selectedStride = {1, 1};
343     * then the following subset is selected by the startDims and selectedDims
344     *     12, 13, 14
345     *     22, 23, 24
346     *     32, 33, 34
347     * </pre>
348     * 
349     * @return the dimension sizes of the selected subset.
350     */
351    public final long[] getSelectedDims() {
352        if (rank < 0) init();
353
354        return selectedDims;
355    }
356
357    /**
358     * Returns the starting position of a selected subset.
359     * <p>
360     * Applications can use this array to change the starting position of a
361     * selection. Combined with the selected dimensions, selected sizes and
362     * stride, the subset of a rectangle selection is fully defined.
363     * <p>
364     * For example, a 4 X 5 dataset
365     * 
366     * <pre>
367     *     0,  1,  2,  3,  4
368     *    10, 11, 12, 13, 14
369     *    20, 21, 22, 23, 24
370     *    30, 31, 32, 33, 34
371     * long[] dims = {4, 5};
372     * long[] startDims = {1, 2};
373     * long[] selectedDims = {3, 3};
374     * long[] selectedStride = {1, 1};
375     * then the following subset is selected by the startDims and selectedDims
376     *     12, 13, 14
377     *     22, 23, 24
378     *     32, 33, 34
379     * </pre>
380     * 
381     * @return the starting position of a selected subset.
382     */
383    public final long[] getStartDims() {
384        if (rank < 0) init();
385
386        return startDims;
387    }
388
389    /**
390     * Returns the selectedStride of the selected dataset.
391     * <p>
392     * Applications can use this array to change how many elements to move in
393     * each dimension.
394     * 
395     * Combined with the starting position and selected sizes, the subset of a
396     * rectangle selection is defined.
397     * <p>
398     * For example, a 4 X 5 dataset
399     * 
400     * <pre>
401     *     0,  1,  2,  3,  4
402     *    10, 11, 12, 13, 14
403     *    20, 21, 22, 23, 24
404     *    30, 31, 32, 33, 34
405     * long[] dims = {4, 5};
406     * long[] startDims = {0, 0};
407     * long[] selectedDims = {2, 2};
408     * long[] selectedStride = {2, 3};
409     * then the following subset is selected by the startDims and selectedDims
410     *     0,   3
411     *     20, 23
412     * </pre>
413     */
414    public final long[] getStride() {
415        if (rank < 0) init();
416
417        if (rank <= 0) {
418            return null;
419        }
420
421        if (selectedStride == null) {
422            selectedStride = new long[rank];
423            for (int i = 0; i < rank; i++) {
424                selectedStride[i] = 1;
425            }
426        }
427
428        return selectedStride;
429    }
430
431    /**
432     * Sets the flag that indicates if a byte array is converted to a string
433     * array.
434     * <p>
435     * In a string dataset, the raw data from file is stored in a byte array. By
436     * default, this byte array is converted to an array of strings. For a large
437     * dataset (e.g. more than one million strings), the converson takes a long
438     * time and requires a lot of memory space to store the strings. At some
439     * applications, such a conversion can be delayed. For example, A GUI
440     * application may convert only part of the strings that are visible to the
441     * users, not the entire data array.
442     * <p>
443     * setConvertByteToString(boolean b) allows users to set the flag so that
444     * applications can choose to perform the byte-to-string conversion or not.
445     * If the flag is set to false, the getData() returns a array of byte
446     * instead of an array of strings.
447     * 
448     * @param b
449     *            convert bytes to strings if b is true; otherwise, if false, do
450     *            not convert bytes to strings.
451     */
452    public final void setConvertByteToString(boolean b) {
453        convertByteToString = b;
454    }
455
456    /**
457     * Returns the flag that indicates if a byte array is converted to a string
458     * array..
459     * 
460     * @return true if byte array is converted to string; otherwise, returns
461     *         false if there is no conversion.
462     */
463    public final boolean getConvertByteToString() {
464        return convertByteToString;
465    }
466
467    /**
468     * Reads the data from file.
469     * <p>
470     * read() reads the data from file to a memory buffer and returns the memory
471     * buffer. The dataset object does not hold the memory buffer. To store the
472     * memory buffer in the dataset object, one must call getData().
473     * <p>
474     * By default, the whole dataset is read into memory. Users can also select
475     * subset to read. Subsetting is done in an implicit way.
476     * <p>
477     * <b>How to Select a Subset</b>
478     * <p>
479     * A selection is specified by three arrays: start, stride and count.
480     * <ol>
481     * <li>start: offset of a selection
482     * <li>stride: determining how many elements to move in each dimension
483     * <li>count: number of elements to select in each dimension
484     * </ol>
485     * getStartDims(), getStartDims() and getSelectedDims() returns the start,
486     * stride and count arrays respectively. Applications can make a selection
487     * by changing the values of the arrays.
488     * <p>
489     * The following example shows how to make a subset. In the example, the
490     * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200;
491     * dims[1]=100; dims[2]=50; dims[3]=10; <br>
492     * We want to select every other data point in dims[1] and dims[2]
493     * 
494     * <pre>
495     * int rank = dataset.getRank(); // number of dimension of the dataset
496     * long[] dims = dataset.getDims(); // the dimension sizes of the dataset
497     * long[] selected = dataset.getSelectedDims(); // the selected size of the dataset
498     * long[] start = dataset.getStartDims(); // the off set of the selection
499     * long[] stride = dataset.getStride(); // the stride of the dataset
500     * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for
501     *                                                   // display
502     * 
503     * // select dim1 and dim2 as 2D data for display,and slice through dim0
504     * selectedIndex[0] = 1;
505     * selectedIndex[1] = 2;
506     * selectedIndex[1] = 0;
507     * 
508     * // reset the selection arrays
509     * for (int i = 0; i &lt; rank; i++) {
510     *     start[i] = 0;
511     *     selected[i] = 1;
512     *     stride[i] = 1;
513     * }
514     * 
515     * // set stride to 2 on dim1 and dim2 so that every other data points are
516     * // selected.
517     * stride[1] = 2;
518     * stride[2] = 2;
519     * 
520     * // set the selection size of dim1 and dim2
521     * selected[1] = dims[1] / stride[1];
522     * selected[2] = dims[1] / stride[2];
523     * 
524     * // when dataset.getData() is called, the selection above will be used since
525     * // the dimension arrays are passed by reference. Changes of these arrays
526     * // outside the dataset object directly change the values of these array
527     * // in the dataset object.
528     * </pre>
529     * <p>
530     * For ScalarDS, the memory data buffer is an one-dimensional array of byte,
531     * short, int, float, double or String type based on the datatype of the
532     * dataset.
533     * <p>
534     * For CompoundDS, the memory data object is an java.util.List object. Each
535     * element of the list is a data array that corresponds to a compound field.
536     * <p>
537     * For example, if compound dataset "comp" has the following nested
538     * structure, and member datatypes
539     * 
540     * <pre>
541     * comp --> m01 (int)
542     * comp --> m02 (float)
543     * comp --> nest1 --> m11 (char)
544     * comp --> nest1 --> m12 (String)
545     * comp --> nest1 --> nest2 --> m21 (long)
546     * comp --> nest1 --> nest2 --> m22 (double)
547     * </pre>
548     * 
549     * getData() returns a list of six arrays: {int[], float[], char[],
550     * String[], long[] and double[]}.
551     * 
552     * @return the data read from file.
553     * 
554     * @see #getData()
555     */
556    public abstract Object read() throws Exception, OutOfMemoryError;
557
558    /**
559     * Reads the raw data of the dataset from file to a byte array.
560     * <p>
561     * readBytes() reads raw data to an array of bytes instead of array of its
562     * datatype. For example, for an one-dimension 32-bit integer dataset of
563     * size 5, the readBytes() returns of a byte array of size 20 instead of an
564     * int array of 5.
565     * <p>
566     * readBytes() can be used to copy data from one dataset to another
567     * efficiently because the raw data is not converted to its native type, it
568     * saves memory space and CPU time.
569     * 
570     * @return the byte array of the raw data.
571     */
572    public abstract byte[] readBytes() throws Exception;
573
574    /**
575     * Writes a memory buffer to the dataset in file.
576     * 
577     * @param buf
578     *            the data to write
579     */
580    public abstract void write(Object buf) throws Exception;
581
582    /**
583     * Writes the memory buffer of this dataset to file.
584     */
585    public final void write() throws Exception {
586        if (data != null) {
587            write(data);
588        }
589    }
590
591    /**
592     * Creates a new dataset and writes the data buffer to the new dataset.
593     * <p>
594     * This function allows applications to create a new dataset for a given
595     * data buffer. For example, users can select a specific interesting part
596     * from a large image and create a new image with the selection.
597     * <p>
598     * The new dataset retains the datatype and dataset creation properties of
599     * this dataset.
600     * 
601     * @param pgroup
602     *            the group which the dataset is copied to.
603     * @param name
604     *            the name of the new dataset.
605     * @param dims
606     *            the dimension sizes of the the new dataset.
607     * @param data
608     *            the data values of the subset to be copied.
609     * 
610     * @return the new dataset.
611     */
612    public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception;
613
614    /**
615     * Returns the datatype object of the dataset.
616     * 
617     * @return the datatype object of the dataset.
618     */
619    public abstract Datatype getDatatype();
620
621    /**
622     * Returns the data buffer of the dataset in memory.
623     * <p>
624     * If data is already loaded into memory, returns the data; otherwise, calls
625     * read() to read data from file into a memory buffer and returns the memory
626     * buffer.
627     * <p>
628     * <p>
629     * By default, the whole dataset is read into memory. Users can also select
630     * subset to read. Subsetting is done in an implicit way.
631     * <p>
632     * <b>How to Select a Subset</b>
633     * <p>
634     * A selection is specified by three arrays: start, stride and count.
635     * <ol>
636     * <li>start: offset of a selection
637     * <li>stride: determining how many elements to move in each dimension
638     * <li>count: number of elements to select in each dimension
639     * </ol>
640     * getStartDims(), getStartDims() and getSelectedDims() returns the start,
641     * stride and count arrays respectively. Applications can make a selection
642     * by changing the values of the arrays.
643     * <p>
644     * The following example shows how to make a subset. In the example, the
645     * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200;
646     * dims[1]=100; dims[2]=50; dims[3]=10; <br>
647     * We want to select every other data point in dims[1] and dims[2]
648     * 
649     * <pre>
650     * int rank = dataset.getRank(); // number of dimension of the dataset
651     * long[] dims = dataset.getDims(); // the dimension sizes of the dataset
652     * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet
653     * long[] start = dataset.getStartDims(); // the off set of the selection
654     * long[] stride = dataset.getStride(); // the stride of the dataset
655     * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for
656     *                                                   // display
657     * 
658     * // select dim1 and dim2 as 2D data for display,and slice through dim0
659     * selectedIndex[0] = 1;
660     * selectedIndex[1] = 2;
661     * selectedIndex[1] = 0;
662     * 
663     * // reset the selection arrays
664     * for (int i = 0; i &lt; rank; i++) {
665     *     start[i] = 0;
666     *     selected[i] = 1;
667     *     stride[i] = 1;
668     * }
669     * 
670     * // set stride to 2 on dim1 and dim2 so that every other data points are
671     * // selected.
672     * stride[1] = 2;
673     * stride[2] = 2;
674     * 
675     * // set the selection size of dim1 and dim2
676     * selected[1] = dims[1] / stride[1];
677     * selected[2] = dims[1] / stride[2];
678     * 
679     * // when dataset.getData() is called, the slection above will be used since
680     * // the dimension arrays are passed by reference. Changes of these arrays
681     * // outside the dataset object directly change the values of these array
682     * // in the dataset object.
683     * </pre>
684     * <p>
685     * For ScalarDS, the memory data buffer is an one-dimensional array of byte,
686     * short, int, float, double or String type based on the datatype of the
687     * dataset.
688     * <p>
689     * For CompoundDS, the memory data object is an java.util.List object. Each
690     * element of the list is a data array that corresponds to a compound field.
691     * <p>
692     * For example, if compound dataset "comp" has the following nested
693     * structure, and memeber datatypes
694     * 
695     * <pre>
696     * comp --> m01 (int)
697     * comp --> m02 (float)
698     * comp --> nest1 --> m11 (char)
699     * comp --> nest1 --> m12 (String)
700     * comp --> nest1 --> nest2 --> m21 (long)
701     * comp --> nest1 --> nest2 --> m22 (double)
702     * </pre>
703     * 
704     * getData() returns a list of six arrays: {int[], float[], char[],
705     * String[], long[] and double[]}.
706     * 
707     * @return the memory buffer of the dataset.
708     */
709    public final Object getData() throws Exception, OutOfMemoryError {
710        if (!isDataLoaded) {
711            log.trace("getData: read");
712            data = read(); // load the data;
713            originalBuf = data;
714            isDataLoaded = true;
715            nPoints = 1;
716            for (int j = 0; j < selectedDims.length; j++) {
717                nPoints *= selectedDims[j];
718            }
719            log.trace("getData: read {}", nPoints);
720        }
721
722        return data;
723    }
724
725    /**
726     * @deprecated Not for public use in the future.
727     *             <p>
728     *             setData() is not safe to use because it changes memory buffer
729     *             of the dataset object. Dataset operation such as write/read
730     *             will fail if the buffer type or size is changed.
731     */
732    @Deprecated
733    public final void setData(Object d) {
734        data = d;
735    }
736
737    /**
738     * Clears the data buffer in memory and to force the next read() to load
739     * data from file.
740     * <p>
741     * The function read() loads data from file into memory only if the data is
742     * not read. If data is already in memory, read() just returns the memory
743     * buffer. Sometimes we want to force read() to re-read data from file. For
744     * example, when the selection is changed, we need to re-read the data.
745     * 
746     * clearData() clears the current memory buffer and force the read() to load
747     * the data from file.
748     * 
749     * @see #getData()
750     * @see #read()
751     */
752    public void clearData() {
753        isDataLoaded = false;
754    }
755
756    /**
757     * Returns the dimension size of the vertical axis.
758     * 
759     * <p>
760     * This function is used by GUI applications such as HDFView. GUI
761     * applications display a dataset in a 2D table or 2D image. The display
762     * order is specified by the index array of selectedIndex as follow:
763     * <dl>
764     * <dt>selectedIndex[0] -- height</dt>
765     * <dd>The vertical axis</dd>
766     * <dt>selectedIndex[1] -- width</dt>
767     * <dd>The horizontal axis</dd>
768     * <dt>selectedIndex[2] -- depth</dt>
769     * <dd>The depth axis is used for 3 or more dimensional datasets.</dd>
770     * </dl>
771     * Applications can use getSelectedIndex() to access and change the display
772     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
773     * following code will set the height=200 and width=50.
774     * 
775     * <pre>
776     * long[] selectedIndex = dataset.getSelectedIndex();
777     * selectedIndex[0] = 0;
778     * selectedIndex[1] = 1;
779     * </pre>
780     * 
781     * @see #getSelectedIndex()
782     * @see #getWidth()
783     * 
784     * @return the size of dimension of the vertical axis.
785     */
786    public final int getHeight() {
787        if (rank < 0) init();
788
789        if ((selectedDims == null) || (selectedIndex == null)) {
790            return 0;
791        }
792
793        return (int) selectedDims[selectedIndex[0]];
794    }
795
796    /**
797     * Returns the size of dimension of the horizontal axis.
798     * 
799     * <p>
800     * This function is used by GUI applications such as HDFView. GUI
801     * applications display dataset a 2D Table or 2D Image. The display order is
802     * specified by the index array of selectedIndex as follow:
803     * <dl>
804     * <dt>selectedIndex[0] -- height</dt>
805     * <dd>The vertical axis</dd>
806     * <dt>selectedIndex[1] -- width</dt>
807     * <dd>The horizontal axis</dd>
808     * <dt>selectedIndex[2] -- depth</dt>
809     * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd>
810     * </dl>
811     * Applications can use getSelectedIndex() to access and change the display
812     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
813     * following code will set the height=200 and width=100.
814     * 
815     * <pre>
816     * long[] selectedIndex = dataset.getSelectedIndex();
817     * selectedIndex[0] = 0;
818     * selectedIndex[1] = 1;
819     * </pre>
820     * 
821     * @see #getSelectedIndex()
822     * @see #getHeight()
823     * 
824     * @return the size of dimension of the horizontal axis.
825     */
826    public final int getWidth() {
827        if (rank < 0) init();
828
829        if ((selectedDims == null) || (selectedIndex == null)) {
830            return 0;
831        }
832
833        if ((selectedDims.length < 2) || (selectedIndex.length < 2)) {
834            return 1;
835        }
836
837        return (int) selectedDims[selectedIndex[1]];
838    }
839
840    /**
841     * Returns the indices of display order.
842     * <p>
843     * 
844     * selectedIndex[] is provided for two purpose:
845     * <OL>
846     * <LI>
847     * selectedIndex[] is used to indicate the order of dimensions for display.
848     * selectedIndex[0] is for the row, selectedIndex[1] is for the column and
849     * selectedIndex[2] for the depth.
850     * <p>
851     * For example, for a four dimesion dataset, if selectedIndex[] = {1, 2, 3},
852     * then dim[1] is selected as row index, dim[2] is selected as column index
853     * and dim[3] is selected as depth index.
854     * <LI>
855     * selectedIndex[] is also used to select dimensions for display for
856     * datasets with three or more dimensions. We assume that applications such
857     * as HDFView can only display data values up to three dimension (2D
858     * spreadsheet/image with a third dimension which the 2D spreadsheet/image
859     * is selected from). For dataset with more than three dimensions, we need
860     * selectedIndex[] to tell applications which three dimensions are chosen
861     * for display. <br>
862     * For example, for a four dimesion dataset, if selectedIndex[] = {1, 2, 3},
863     * then dim[1] is selected as row index, dim[2] is selected as column index
864     * and dim[3] is selected as depth index. dim[0] is not selected. Its
865     * location is fixed at 0 by default.
866     * </OL>
867     * 
868     * @return the array of the indices of display order.
869     */
870    public final int[] getSelectedIndex() {
871        if (rank < 0) init();
872
873        return selectedIndex;
874    }
875
876    /**
877     * Returns the string representation of compression information.
878     * <p>
879     * For example,
880     * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED".
881     * 
882     * @return the string representation of compression information.
883     */
884    public final String getCompression() {
885        if (rank < 0) init();
886
887        return compression;
888    }
889
890    /**
891     * Returns the string representation of filter information.
892     * 
893     * @return the string representation of filter information.
894     */
895    public final String getFilters() {
896        if (rank < 0) init();
897
898        return filters;
899    }
900
901    /**
902     * Returns the string representation of storage information.
903     * 
904     * @return the string representation of storage information.
905     */
906    public final String getStorage() {
907        if (rank < 0) init();
908
909        return storage;
910    }
911
912    /**
913     * Returns the array that contains the dimension sizes of the chunk of the
914     * dataset. Returns null if the dataset is not chunked.
915     * 
916     * @return the array of chunk sizes or returns null if the dataset is not
917     *         chunked.
918     */
919    public final long[] getChunkSize() {
920        if (rank < 0) init();
921
922        return chunkSize;
923    }
924
925    /**
926     * @deprecated Not for public use in the future. <br>
927     *             Using {@link #convertFromUnsignedC(Object, Object)}
928     */
929    @Deprecated
930    public static Object convertFromUnsignedC(Object data_in) {
931        return Dataset.convertFromUnsignedC(data_in, null);
932    }
933
934    /**
935     * Converts one-dimension array of unsigned C-type integers to a new array
936     * of appropriate Java integer in memory.
937     * <p>
938     * Since Java does not support unsigned integer, values of unsigned C-type
939     * integers must be converted into its appropriate Java integer. Otherwise,
940     * the data value will not displayed correctly. For example, if an unsigned
941     * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of
942     * the correct value of 200.
943     * <p>
944     * Unsigned C integers are upgrade to Java integers according to the
945     * following table:
946     * <TABLE CELLSPACING=0 BORDER=1 CELLPADDING=5 WIDTH=400>
947     * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption>
948     * <TR>
949     * <TD><B>Unsigned C Integer</B></TD>
950     * <TD><B>JAVA Intege</B>r</TD>
951     * </TR>
952     * <TR>
953     * <TD>unsigned byte</TD>
954     * <TD>signed short</TD>
955     * </TR>
956     * <TR>
957     * <TD>unsigned short</TD>
958     * <TD>signed int</TD>
959     * </TR>
960     * <TR>
961     * <TD>unsigned int</TD>
962     * <TD>signed long</TD>
963     * </TR>
964     * <TR>
965     * <TD>unsigned long</TD>
966     * <TD>signed long</TD>
967     * </TR>
968     * </TABLE>
969     * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers.
970     * Therefore, the values of unsigned 64-bit dataset may be wrong in Java
971     * application</strong>.
972     * <p>
973     * If memory data of unsigned integers is converted by
974     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
975     * the data back to unsigned C before data is written into file.
976     * 
977     * @see #convertToUnsignedC(Object, Object)
978     * 
979     * @param data_in
980     *            the input 1D array of the unsigned C-type integers.
981     * @param data_out
982     *            the output converted (or upgraded) 1D array of Java integers.
983     * 
984     * @return the upgraded 1D array of Java integers.
985     */
986    public static Object convertFromUnsignedC(Object data_in, Object data_out) {
987        if (data_in == null) {
988            return null;
989        }
990
991        Class data_class = data_in.getClass();
992        if (!data_class.isArray()) {
993            return null;
994        }
995
996        if (data_out != null) {
997            Class data_class_out = data_out.getClass();
998            if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) {
999                data_out = null;
1000            }
1001        }
1002
1003        String cname = data_class.getName();
1004        char dname = cname.charAt(cname.lastIndexOf("[") + 1);
1005        int size = Array.getLength(data_in);
1006        log.trace("convertFromUnsignedC: cname={} dname={} size={}", cname, dname, size);
1007
1008        if (dname == 'B') {
1009            short[] sdata = null;
1010            if (data_out == null) {
1011                sdata = new short[size];
1012            }
1013            else {
1014                sdata = (short[]) data_out;
1015            }
1016
1017            byte[] bdata = (byte[]) data_in;
1018            for (int i = 0; i < size; i++) {
1019                sdata[i] = (short) ((bdata[i] + 256) & 0xFF);
1020            }
1021
1022            data_out = sdata;
1023        }
1024        else if (dname == 'S') {
1025            int[] idata = null;
1026            if (data_out == null) {
1027                idata = new int[size];
1028            }
1029            else {
1030                idata = (int[]) data_out;
1031            }
1032
1033            short[] sdata = (short[]) data_in;
1034            for (int i = 0; i < size; i++) {
1035                idata[i] = (sdata[i] + 65536) & 0xFFFF;
1036            }
1037
1038            data_out = idata;
1039        }
1040        else if (dname == 'I') {
1041            long[] ldata = null;
1042            if (data_out == null) {
1043                ldata = new long[size];
1044            }
1045            else {
1046                ldata = (long[]) data_out;
1047            }
1048
1049            int[] idata = (int[]) data_in;
1050            for (int i = 0; i < size; i++) {
1051                ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL;
1052            }
1053
1054            data_out = ldata;
1055        }
1056        else {
1057            data_out = data_in;
1058            log.debug("convertFromUnsignedC: Java does not support unsigned long");
1059        }
1060
1061        return data_out;
1062    }
1063
1064    /**
1065     * @deprecated Not for public use in the future. <br>
1066     *             Using {@link #convertToUnsignedC(Object, Object)}
1067     */
1068    @Deprecated
1069    public static Object convertToUnsignedC(Object data_in) {
1070        return Dataset.convertToUnsignedC(data_in, null);
1071    }
1072
1073    /**
1074     * Converts the array of converted unsigned integer back to unsigned C-type
1075     * integer data in memory.
1076     * <p>
1077     * If memory data of unsigned integers is converted by
1078     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
1079     * the data back to unsigned C before data is written into file.
1080     * 
1081     * @see #convertFromUnsignedC(Object, Object)
1082     * 
1083     * @param data_in
1084     *            the input array of the Java integer.
1085     * @param data_out
1086     *            the output array of the unsigned C-type integer.
1087     * 
1088     * @return the converted data of unsigned C-type integer array.
1089     */
1090    public static Object convertToUnsignedC(Object data_in, Object data_out) {
1091        if (data_in == null) {
1092            return null;
1093        }
1094
1095        Class data_class = data_in.getClass();
1096        if (!data_class.isArray()) {
1097            return null;
1098        }
1099
1100        if (data_out != null) {
1101            Class data_class_out = data_out.getClass();
1102            if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) {
1103                data_out = null;
1104            }
1105        }
1106
1107        String cname = data_class.getName();
1108        char dname = cname.charAt(cname.lastIndexOf("[") + 1);
1109        int size = Array.getLength(data_in);
1110        log.trace("convertToUnsignedC: cname={} dname={} size={}", cname, dname, size);
1111
1112        if (dname == 'S') {
1113            byte[] bdata = null;
1114            if (data_out == null) {
1115                bdata = new byte[size];
1116            }
1117            else {
1118                bdata = (byte[]) data_out;
1119            }
1120            short[] sdata = (short[]) data_in;
1121            for (int i = 0; i < size; i++) {
1122                bdata[i] = (byte) sdata[i];
1123            }
1124            data_out = bdata;
1125        }
1126        else if (dname == 'I') {
1127            short[] sdata = null;
1128            if (data_out == null) {
1129                sdata = new short[size];
1130            }
1131            else {
1132                sdata = (short[]) data_out;
1133            }
1134            int[] idata = (int[]) data_in;
1135            for (int i = 0; i < size; i++) {
1136                sdata[i] = (short) idata[i];
1137            }
1138            data_out = sdata;
1139        }
1140        else if (dname == 'J') {
1141            int[] idata = null;
1142            if (data_out == null) {
1143                idata = new int[size];
1144            }
1145            else {
1146                idata = (int[]) data_out;
1147            }
1148            long[] ldata = (long[]) data_in;
1149            for (int i = 0; i < size; i++) {
1150                idata[i] = (int) ldata[i];
1151            }
1152            data_out = idata;
1153        }
1154        else {
1155            data_out = data_in;
1156            log.debug("convertToUnsignedC: Java does not support unsigned long");
1157        }
1158
1159        return data_out;
1160    }
1161
1162    /**
1163     * Converts an array of bytes into an array of Strings for a fixed string
1164     * dataset.
1165     * <p>
1166     * A C-string is an array of chars while an Java String is an object. When a
1167     * string dataset is read into Java application, the data is stored in an
1168     * array of Java bytes. byteToString() is used to convert the array of bytes
1169     * into array of Java strings so that applications can display and modify
1170     * the data content.
1171     * <p>
1172     * For example, the content of a two element C string dataset is {"ABC",
1173     * "abc"}. Java applications will read the data into an byte array of {65,
1174     * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java
1175     * String of strs[0]="ABC", and strs[1]="abc".
1176     * <p>
1177     * If memory data of strings is converted to Java Strings, stringToByte()
1178     * must be called to convert the memory data back to byte array before data
1179     * is written to file.
1180     * 
1181     * @see #stringToByte(String[], int)
1182     * 
1183     * @param bytes
1184     *            the array of bytes to convert.
1185     * @param length
1186     *            the length of string.
1187     * 
1188     * @return the array of Java String.
1189     */
1190    public static final String[] byteToString(byte[] bytes, int length) {
1191        if (bytes == null) {
1192            return null;
1193        }
1194
1195        int n = bytes.length / length;
1196        log.trace("byteToString: n={} from length of {}", n, length);
1197        // String bigstr = new String(bytes);
1198        String[] strArray = new String[n];
1199        String str = null;
1200        int idx = 0;
1201        for (int i = 0; i < n; i++) {
1202            str = new String(bytes, i * length, length);
1203            // bigstr.substring uses less memory space
1204            // NOTE: bigstr does not work on linux if bytes.length is very large
1205            // see bug 1091
1206            // offset = i*length;
1207            // str = bigstr.substring(offset, offset+length);
1208
1209            idx = str.indexOf('\0');
1210            if (idx > 0) {
1211                str = str.substring(0, idx);
1212            }
1213
1214            // trim only the end
1215            int end = str.length();
1216            while (end > 0 && str.charAt(end - 1) <= '\u0020')
1217                end--;
1218
1219            strArray[i] = (end <= 0) ? "" : str.substring(0, end);
1220
1221            // trim both start and end
1222            // strArray[i] = str.trim();
1223        }
1224
1225        return strArray;
1226    }
1227
1228    /**
1229     * Converts a string array into an array of bytes for a fixed string
1230     * dataset.
1231     * <p>
1232     * If memory data of strings is converted to Java Strings, stringToByte()
1233     * must be called to convert the memory data back to byte array before data
1234     * is written to file.
1235     * 
1236     * @see #byteToString(byte[] bytes, int length)
1237     * 
1238     * @param strings
1239     *            the array of string.
1240     * @param length
1241     *            the length of string.
1242     * 
1243     * @return the array of bytes.
1244     */
1245    public static final byte[] stringToByte(String[] strings, int length) {
1246        if (strings == null) {
1247            return null;
1248        }
1249
1250        int size = strings.length;
1251        byte[] bytes = new byte[size * length];
1252        log.trace("stringToByte: size={} length={}", size, length);
1253        StringBuffer strBuff = new StringBuffer(length);
1254        for (int i = 0; i < size; i++) {
1255            // initialize the string with spaces
1256            strBuff.replace(0, length, " ");
1257
1258            if (strings[i] != null) {
1259                if (strings[i].length() > length) {
1260                    strings[i] = strings[i].substring(0, length);
1261                }
1262                strBuff.replace(0, length, strings[i]);
1263            }
1264
1265            strBuff.setLength(length);
1266            System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length);
1267        }
1268
1269        return bytes;
1270    }
1271
1272    /**
1273     * Returns the array of strings that represent the dimension names. Returns
1274     * null if there is no dimension name.
1275     * <p>
1276     * Some datasets have pre-defined names for each dimension such as
1277     * "Latitude" and "Longitude". getDimNames() returns these pre-defined
1278     * names.
1279     * 
1280     * @return the names of dimensions, or null if there is no dimension name.
1281     */
1282    public final String[] getDimNames() {
1283        if (rank < 0) init();
1284
1285        return dimNames;
1286    }
1287
1288    /**
1289     * Checks if a given datatype is a string. Sub-classes must replace this
1290     * default implementation.
1291     * 
1292     * @param tid
1293     *            The data type identifier.
1294     * 
1295     * @return true if the datatype is a string; otherwise returns false.
1296     */
1297    public boolean isString(int tid) {
1298        return false;
1299    }
1300
1301    /**
1302     * Returns the size in bytes of a given datatype. Sub-classes must replace
1303     * this default implementation.
1304     * 
1305     * @param tid
1306     *            The data type identifier.
1307     * 
1308     * @return The size of the datatype
1309     */
1310    public int getSize(int tid) {
1311        return -1;
1312    }
1313
1314    /**
1315     * Get flag that indicate if enum data is converted to strings.
1316     * 
1317     * @return the enumConverted
1318     */
1319    public boolean isEnumConverted() {
1320        return enumConverted;
1321    }
1322
1323    /**
1324     * Set flag that indicate if enum data is converted to strings.
1325     * 
1326     * @param b
1327     *            the enumConverted to set
1328     */
1329    public void setEnumConverted(boolean b) {
1330        if (enumConverted != b) {
1331            originalBuf = convertedBuf = null;
1332            this.clearData();
1333        }
1334
1335        enumConverted = b;
1336    }
1337
1338    /**
1339     * Get Class of the original data buffer if converted.
1340     * 
1341     * @return the Class of originalBuf
1342     */
1343    public final Class getOriginalClass() {
1344        return originalBuf.getClass();
1345    }
1346}