001/***************************************************************************** 002 * Copyright by The HDF Group. * 003 * Copyright by the Board of Trustees of the University of Illinois. * 004 * All rights reserved. * 005 * * 006 * This file is part of the HDF Java Products distribution. * 007 * The full copyright notice, including terms governing use, modification, * 008 * and redistribution, is contained in the files COPYING and Copyright.html. * 009 * COPYING can be found at the root of the source code distribution tree. * 010 * Or, see http://hdfgroup.org/products/hdf-java/doc/Copyright.html. * 011 * If you do not have access to either file, you may request a copy from * 012 * help@hdfgroup.org. * 013 ****************************************************************************/ 014 015package hdf.object; 016 017import java.lang.reflect.Array; 018import java.util.Vector; 019 020/** 021 * The abstract class provides general APIs to create and manipulate dataset 022 * objects, and retrieve dataset properties datatype and dimension sizes. 023 * <p> 024 * This class provides two convenient functions, read()/write(), to read/write 025 * data values. Reading/writing data may take many library calls if we use the 026 * library APIs directly. The read() and write functions hide all the details of 027 * these calls from users. 028 * 029 * @see hdf.object.ScalarDS 030 * @see hdf.object.CompoundDS 031 * 032 * @version 1.1 9/4/2007 033 * @author Peter X. Cao 034 */ 035public abstract class Dataset extends HObject { 036 /** 037 * 038 */ 039 private static final long serialVersionUID = -3360885430038261178L; 040 041 private final static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class); 042 043 /** 044 * The memory buffer that holds the raw data of the dataset. 045 */ 046 protected Object data; 047 048 /** 049 * The number of dimensions of the dataset. 050 */ 051 protected int rank; 052 053 /** 054 * The current dimension sizes of the dataset 055 */ 056 protected long[] dims; 057 058 /** 059 * The max dimension sizes of the dataset 060 */ 061 protected long[] maxDims; 062 063 /** 064 * Array that contains the number of data points selected (for read/write) 065 * in each dimension. 066 * <p> 067 * The select size must be less than or equal to the current dimension size. 068 * A subset of a rectangle selection is defined by the starting position and 069 * selected sizes. 070 * <p> 071 * For example, a 4 X 5 dataset 072 * 073 * <pre> 074 * 0, 1, 2, 3, 4 075 * 10, 11, 12, 13, 14 076 * 20, 21, 22, 23, 24 077 * 30, 31, 32, 33, 34 078 * long[] dims = {4, 5}; 079 * long[] startDims = {1, 2}; 080 * long[] selectedDims = {3, 3}; 081 * then the following subset is selected by the startDims and selectedDims above 082 * 12, 13, 14 083 * 22, 23, 24 084 * 32, 33, 34 085 */ 086 protected long[] selectedDims; 087 088 /** 089 * The starting position of each dimension of a selected subset. With both 090 * the starting position and selected sizes, the subset of a rectangle 091 * selection is fully defined. 092 */ 093 protected long[] startDims; 094 095 /** 096 * Array that contains the indices of the dimensions selected for display. 097 * <p> 098 * <B>selectedIndex[] is provided for two purpose:</B> 099 * <OL> 100 * <LI> 101 * selectedIndex[] is used to indicate the order of dimensions for display, 102 * i.e. selectedIndex[0] = row, selectedIndex[1] = column and 103 * selectedIndex[2] = depth. For example, for a four dimension dataset, if 104 * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index, 105 * dim[2] is selected as column index and dim[3] is selected as depth index. 106 * <LI> 107 * selectedIndex[] is also used to select dimensions for display for 108 * datasets with three or more dimensions. We assume that applications such 109 * as HDFView can only display data up to three dimensions (a 2D 110 * spreadsheet/image with a third dimension that the 2D spreadsheet/image is 111 * cut from). For dataset with more than three dimensions, we need 112 * selectedIndex[] to store which three dimensions are chosen for display. 113 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 114 * then dim[1] is selected as row index, dim[2] is selected as column index 115 * and dim[3] is selected as depth index. dim[0] is not selected. Its 116 * location is fixed at 0 by default. 117 * </OL> 118 */ 119 protected final int[] selectedIndex; 120 121 /** 122 * The number of elements to move from the start location in each dimension. 123 * For example, if selectedStride[0] = 2, every other data point is selected 124 * along dim[0]. 125 */ 126 protected long[] selectedStride; 127 128 /** 129 * The array of dimension sizes for a chunk. 130 */ 131 protected long[] chunkSize; 132 133 /** The compression information. */ 134 protected String compression; 135 public final static String compression_gzip_txt = "GZIP: level = "; 136 137 /** The filters information. */ 138 protected String filters; 139 140 /** The storage information. */ 141 protected String storage; 142 143 /** The datatype object of the dataset. */ 144 protected Datatype datatype; 145 146 /** 147 * Array of strings that represent the dimension names. It is null if 148 * dimension names do not exist. 149 */ 150 protected String[] dimNames; 151 152 /** Flag to indicate if the byte[] array is converted to strings */ 153 protected boolean convertByteToString = true; 154 155 /** Flag to indicate if data values are loaded into memory. */ 156 protected boolean isDataLoaded = false; 157 158 /** The number of data points in the memory buffer. */ 159 protected long nPoints = 1; 160 161 /** 162 * The data buffer that contains the raw data directly reading from file 163 * (before any data conversion). 164 */ 165 protected Object originalBuf = null; 166 167 /** 168 * The array that holds the converted data of unsigned C-type integers. 169 * <p> 170 * For example, Suppose that the original data is an array of unsigned 171 * 16-bit short integers. Since Java does not support unsigned integer, the 172 * data is converted to an array of 32-bit singed integer. In that case, the 173 * converted buffer is the array of 32-bit singed integer. 174 */ 175 protected Object convertedBuf = null; 176 177 /** 178 * Flag to indicate if the enum data is converted to strings. 179 */ 180 protected boolean enumConverted = false; 181 182 /** 183 * Constructs a Dataset object with a given file, name and path. 184 * <p> 185 * 186 * @param theFile 187 * the file that contains the dataset. 188 * @param name 189 * the name of the Dataset, e.g. "dset1". 190 * @param path 191 * the full group path of this Dataset, e.g. "/arrays/". 192 */ 193 public Dataset(FileFormat theFile, String name, String path) { 194 this(theFile, name, path, null); 195 } 196 197 /** 198 * @deprecated Not for public use in the future. <br> 199 * Using {@link #Dataset(FileFormat, String, String)} 200 */ 201 @Deprecated 202 public Dataset(FileFormat theFile, String name, String path, long[] oid) { 203 super(theFile, name, path, oid); 204 205 rank = 0; 206 data = null; 207 dims = null; 208 maxDims = null; 209 selectedDims = null; 210 startDims = null; 211 selectedStride = null; 212 chunkSize = null; 213 compression = "NONE"; 214 filters = "NONE"; 215 storage = "NONE"; 216 dimNames = null; 217 218 selectedIndex = new int[3]; 219 selectedIndex[0] = 0; 220 selectedIndex[1] = 1; 221 selectedIndex[2] = 2; 222 } 223 224 /** 225 * Clears memory held by the dataset, such as data buffer. 226 */ 227 public void clear() { 228 if (data != null) { 229 if (data instanceof Vector) { 230 ((Vector) data).setSize(0); 231 } 232 data = null; 233 originalBuf = null; 234 convertedBuf = null; 235 } 236 isDataLoaded = false; 237 } 238 239 /** 240 * Retrieves datatype and dataspace information from file and sets the 241 * dataset in memory. 242 * <p> 243 * The init() is designed to support lazy operation in dataset object. When 244 * a data object is retrieved from file, the datatype, dataspace and raw 245 * data are not loaded into memory. When it is asked to read the raw data 246 * from file, init() is first called to get the datatype and dataspace 247 * information, then load the raw data from file. 248 * <p> 249 * init() is also used to reset selection of a dataset (start, stride and 250 * count) to the default, which is the entire dataset for 1D or 2D datasets. 251 * In the following example, init() at step 1) retrieve datatype and 252 * dataspace information from file. getData() at step 3) read only one data 253 * point. init() at step 4) reset the selection to the whole dataset. 254 * getData() at step 4) reads the values of whole dataset into memory. 255 * 256 * <pre> 257 * dset = (Dataset) file.get(NAME_DATASET); 258 * 259 * // 1) get datatype and dataspace information from file 260 * dset.init(); 261 * rank = dset.getRank(); // rank = 2, a 2D dataset 262 * count = dset.getSelectedDims(); 263 * start = dset.getStartDims(); 264 * dims = dset.getDims(); 265 * 266 * // 2) select only one data point 267 * for (int i = 0; i < rank; i++) { 268 * start[0] = 0; 269 * count[i] = 1; 270 * } 271 * 272 * // 3) read one data point 273 * data = dset.getData(); 274 * 275 * // 4) reset to select the whole dataset 276 * dset.init(); 277 * 278 * // 5) clean the memory data buffer 279 * dset.clearData(); 280 * 281 * // 6) Read the whole dataset 282 * data = dset.getData(); 283 * </pre> 284 */ 285 public abstract void init(); 286 287 /** 288 * Returns the rank (number of dimensions) of the dataset. 289 * 290 * @return the number of dimensions of the dataset. 291 */ 292 public final int getRank() { 293 if (rank < 0) init(); 294 295 return rank; 296 } 297 298 /** 299 * Returns the array that contains the dimension sizes of the dataset. 300 * 301 * @return the dimension sizes of the dataset. 302 */ 303 public final long[] getDims() { 304 if (rank < 0) init(); 305 306 return dims; 307 } 308 309 /** 310 * Returns the array that contains the max dimension sizes of the dataset. 311 * 312 * @return the max dimension sizes of the dataset. 313 */ 314 public final long[] getMaxDims() { 315 if (rank < 0) init(); 316 317 if (maxDims == null) return dims; 318 319 return maxDims; 320 } 321 322 /** 323 * Returns the dimension sizes of the selected subset. 324 * <p> 325 * The SelectedDims is the number of data points of the selected subset. 326 * Applications can use this array to change the size of selected subset. 327 * 328 * The select size must be less than or equal to the current dimension size. 329 * Combined with the starting position, selected sizes and stride, the 330 * subset of a rectangle selection is fully defined. 331 * <p> 332 * For example, a 4 X 5 dataset 333 * 334 * <pre> 335 * 0, 1, 2, 3, 4 336 * 10, 11, 12, 13, 14 337 * 20, 21, 22, 23, 24 338 * 30, 31, 32, 33, 34 339 * long[] dims = {4, 5}; 340 * long[] startDims = {1, 2}; 341 * long[] selectedDims = {3, 3}; 342 * long[] selectedStride = {1, 1}; 343 * then the following subset is selected by the startDims and selectedDims 344 * 12, 13, 14 345 * 22, 23, 24 346 * 32, 33, 34 347 * </pre> 348 * 349 * @return the dimension sizes of the selected subset. 350 */ 351 public final long[] getSelectedDims() { 352 if (rank < 0) init(); 353 354 return selectedDims; 355 } 356 357 /** 358 * Returns the starting position of a selected subset. 359 * <p> 360 * Applications can use this array to change the starting position of a 361 * selection. Combined with the selected dimensions, selected sizes and 362 * stride, the subset of a rectangle selection is fully defined. 363 * <p> 364 * For example, a 4 X 5 dataset 365 * 366 * <pre> 367 * 0, 1, 2, 3, 4 368 * 10, 11, 12, 13, 14 369 * 20, 21, 22, 23, 24 370 * 30, 31, 32, 33, 34 371 * long[] dims = {4, 5}; 372 * long[] startDims = {1, 2}; 373 * long[] selectedDims = {3, 3}; 374 * long[] selectedStride = {1, 1}; 375 * then the following subset is selected by the startDims and selectedDims 376 * 12, 13, 14 377 * 22, 23, 24 378 * 32, 33, 34 379 * </pre> 380 * 381 * @return the starting position of a selected subset. 382 */ 383 public final long[] getStartDims() { 384 if (rank < 0) init(); 385 386 return startDims; 387 } 388 389 /** 390 * Returns the selectedStride of the selected dataset. 391 * <p> 392 * Applications can use this array to change how many elements to move in 393 * each dimension. 394 * 395 * Combined with the starting position and selected sizes, the subset of a 396 * rectangle selection is defined. 397 * <p> 398 * For example, a 4 X 5 dataset 399 * 400 * <pre> 401 * 0, 1, 2, 3, 4 402 * 10, 11, 12, 13, 14 403 * 20, 21, 22, 23, 24 404 * 30, 31, 32, 33, 34 405 * long[] dims = {4, 5}; 406 * long[] startDims = {0, 0}; 407 * long[] selectedDims = {2, 2}; 408 * long[] selectedStride = {2, 3}; 409 * then the following subset is selected by the startDims and selectedDims 410 * 0, 3 411 * 20, 23 412 * </pre> 413 */ 414 public final long[] getStride() { 415 if (rank < 0) init(); 416 417 if (rank <= 0) { 418 return null; 419 } 420 421 if (selectedStride == null) { 422 selectedStride = new long[rank]; 423 for (int i = 0; i < rank; i++) { 424 selectedStride[i] = 1; 425 } 426 } 427 428 return selectedStride; 429 } 430 431 /** 432 * Sets the flag that indicates if a byte array is converted to a string 433 * array. 434 * <p> 435 * In a string dataset, the raw data from file is stored in a byte array. By 436 * default, this byte array is converted to an array of strings. For a large 437 * dataset (e.g. more than one million strings), the converson takes a long 438 * time and requires a lot of memory space to store the strings. At some 439 * applications, such a conversion can be delayed. For example, A GUI 440 * application may convert only part of the strings that are visible to the 441 * users, not the entire data array. 442 * <p> 443 * setConvertByteToString(boolean b) allows users to set the flag so that 444 * applications can choose to perform the byte-to-string conversion or not. 445 * If the flag is set to false, the getData() returns a array of byte 446 * instead of an array of strings. 447 * 448 * @param b 449 * convert bytes to strings if b is true; otherwise, if false, do 450 * not convert bytes to strings. 451 */ 452 public final void setConvertByteToString(boolean b) { 453 convertByteToString = b; 454 } 455 456 /** 457 * Returns the flag that indicates if a byte array is converted to a string 458 * array.. 459 * 460 * @return true if byte array is converted to string; otherwise, returns 461 * false if there is no conversion. 462 */ 463 public final boolean getConvertByteToString() { 464 return convertByteToString; 465 } 466 467 /** 468 * Reads the data from file. 469 * <p> 470 * read() reads the data from file to a memory buffer and returns the memory 471 * buffer. The dataset object does not hold the memory buffer. To store the 472 * memory buffer in the dataset object, one must call getData(). 473 * <p> 474 * By default, the whole dataset is read into memory. Users can also select 475 * subset to read. Subsetting is done in an implicit way. 476 * <p> 477 * <b>How to Select a Subset</b> 478 * <p> 479 * A selection is specified by three arrays: start, stride and count. 480 * <ol> 481 * <li>start: offset of a selection 482 * <li>stride: determining how many elements to move in each dimension 483 * <li>count: number of elements to select in each dimension 484 * </ol> 485 * getStartDims(), getStartDims() and getSelectedDims() returns the start, 486 * stride and count arrays respectively. Applications can make a selection 487 * by changing the values of the arrays. 488 * <p> 489 * The following example shows how to make a subset. In the example, the 490 * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200; 491 * dims[1]=100; dims[2]=50; dims[3]=10; <br> 492 * We want to select every other data point in dims[1] and dims[2] 493 * 494 * <pre> 495 * int rank = dataset.getRank(); // number of dimension of the dataset 496 * long[] dims = dataset.getDims(); // the dimension sizes of the dataset 497 * long[] selected = dataset.getSelectedDims(); // the selected size of the dataset 498 * long[] start = dataset.getStartDims(); // the off set of the selection 499 * long[] stride = dataset.getStride(); // the stride of the dataset 500 * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for 501 * // display 502 * 503 * // select dim1 and dim2 as 2D data for display,and slice through dim0 504 * selectedIndex[0] = 1; 505 * selectedIndex[1] = 2; 506 * selectedIndex[1] = 0; 507 * 508 * // reset the selection arrays 509 * for (int i = 0; i < rank; i++) { 510 * start[i] = 0; 511 * selected[i] = 1; 512 * stride[i] = 1; 513 * } 514 * 515 * // set stride to 2 on dim1 and dim2 so that every other data points are 516 * // selected. 517 * stride[1] = 2; 518 * stride[2] = 2; 519 * 520 * // set the selection size of dim1 and dim2 521 * selected[1] = dims[1] / stride[1]; 522 * selected[2] = dims[1] / stride[2]; 523 * 524 * // when dataset.getData() is called, the selection above will be used since 525 * // the dimension arrays are passed by reference. Changes of these arrays 526 * // outside the dataset object directly change the values of these array 527 * // in the dataset object. 528 * </pre> 529 * <p> 530 * For ScalarDS, the memory data buffer is an one-dimensional array of byte, 531 * short, int, float, double or String type based on the datatype of the 532 * dataset. 533 * <p> 534 * For CompoundDS, the memory data object is an java.util.List object. Each 535 * element of the list is a data array that corresponds to a compound field. 536 * <p> 537 * For example, if compound dataset "comp" has the following nested 538 * structure, and member datatypes 539 * 540 * <pre> 541 * comp --> m01 (int) 542 * comp --> m02 (float) 543 * comp --> nest1 --> m11 (char) 544 * comp --> nest1 --> m12 (String) 545 * comp --> nest1 --> nest2 --> m21 (long) 546 * comp --> nest1 --> nest2 --> m22 (double) 547 * </pre> 548 * 549 * getData() returns a list of six arrays: {int[], float[], char[], 550 * String[], long[] and double[]}. 551 * 552 * @return the data read from file. 553 * 554 * @see #getData() 555 */ 556 public abstract Object read() throws Exception, OutOfMemoryError; 557 558 /** 559 * Reads the raw data of the dataset from file to a byte array. 560 * <p> 561 * readBytes() reads raw data to an array of bytes instead of array of its 562 * datatype. For example, for an one-dimension 32-bit integer dataset of 563 * size 5, the readBytes() returns of a byte array of size 20 instead of an 564 * int array of 5. 565 * <p> 566 * readBytes() can be used to copy data from one dataset to another 567 * efficiently because the raw data is not converted to its native type, it 568 * saves memory space and CPU time. 569 * 570 * @return the byte array of the raw data. 571 */ 572 public abstract byte[] readBytes() throws Exception; 573 574 /** 575 * Writes a memory buffer to the dataset in file. 576 * 577 * @param buf 578 * the data to write 579 */ 580 public abstract void write(Object buf) throws Exception; 581 582 /** 583 * Writes the memory buffer of this dataset to file. 584 */ 585 public final void write() throws Exception { 586 if (data != null) { 587 write(data); 588 } 589 } 590 591 /** 592 * Creates a new dataset and writes the data buffer to the new dataset. 593 * <p> 594 * This function allows applications to create a new dataset for a given 595 * data buffer. For example, users can select a specific interesting part 596 * from a large image and create a new image with the selection. 597 * <p> 598 * The new dataset retains the datatype and dataset creation properties of 599 * this dataset. 600 * 601 * @param pgroup 602 * the group which the dataset is copied to. 603 * @param name 604 * the name of the new dataset. 605 * @param dims 606 * the dimension sizes of the the new dataset. 607 * @param data 608 * the data values of the subset to be copied. 609 * 610 * @return the new dataset. 611 */ 612 public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception; 613 614 /** 615 * Returns the datatype object of the dataset. 616 * 617 * @return the datatype object of the dataset. 618 */ 619 public abstract Datatype getDatatype(); 620 621 /** 622 * Returns the data buffer of the dataset in memory. 623 * <p> 624 * If data is already loaded into memory, returns the data; otherwise, calls 625 * read() to read data from file into a memory buffer and returns the memory 626 * buffer. 627 * <p> 628 * <p> 629 * By default, the whole dataset is read into memory. Users can also select 630 * subset to read. Subsetting is done in an implicit way. 631 * <p> 632 * <b>How to Select a Subset</b> 633 * <p> 634 * A selection is specified by three arrays: start, stride and count. 635 * <ol> 636 * <li>start: offset of a selection 637 * <li>stride: determining how many elements to move in each dimension 638 * <li>count: number of elements to select in each dimension 639 * </ol> 640 * getStartDims(), getStartDims() and getSelectedDims() returns the start, 641 * stride and count arrays respectively. Applications can make a selection 642 * by changing the values of the arrays. 643 * <p> 644 * The following example shows how to make a subset. In the example, the 645 * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200; 646 * dims[1]=100; dims[2]=50; dims[3]=10; <br> 647 * We want to select every other data point in dims[1] and dims[2] 648 * 649 * <pre> 650 * int rank = dataset.getRank(); // number of dimension of the dataset 651 * long[] dims = dataset.getDims(); // the dimension sizes of the dataset 652 * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet 653 * long[] start = dataset.getStartDims(); // the off set of the selection 654 * long[] stride = dataset.getStride(); // the stride of the dataset 655 * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for 656 * // display 657 * 658 * // select dim1 and dim2 as 2D data for display,and slice through dim0 659 * selectedIndex[0] = 1; 660 * selectedIndex[1] = 2; 661 * selectedIndex[1] = 0; 662 * 663 * // reset the selection arrays 664 * for (int i = 0; i < rank; i++) { 665 * start[i] = 0; 666 * selected[i] = 1; 667 * stride[i] = 1; 668 * } 669 * 670 * // set stride to 2 on dim1 and dim2 so that every other data points are 671 * // selected. 672 * stride[1] = 2; 673 * stride[2] = 2; 674 * 675 * // set the selection size of dim1 and dim2 676 * selected[1] = dims[1] / stride[1]; 677 * selected[2] = dims[1] / stride[2]; 678 * 679 * // when dataset.getData() is called, the slection above will be used since 680 * // the dimension arrays are passed by reference. Changes of these arrays 681 * // outside the dataset object directly change the values of these array 682 * // in the dataset object. 683 * </pre> 684 * <p> 685 * For ScalarDS, the memory data buffer is an one-dimensional array of byte, 686 * short, int, float, double or String type based on the datatype of the 687 * dataset. 688 * <p> 689 * For CompoundDS, the memory data object is an java.util.List object. Each 690 * element of the list is a data array that corresponds to a compound field. 691 * <p> 692 * For example, if compound dataset "comp" has the following nested 693 * structure, and memeber datatypes 694 * 695 * <pre> 696 * comp --> m01 (int) 697 * comp --> m02 (float) 698 * comp --> nest1 --> m11 (char) 699 * comp --> nest1 --> m12 (String) 700 * comp --> nest1 --> nest2 --> m21 (long) 701 * comp --> nest1 --> nest2 --> m22 (double) 702 * </pre> 703 * 704 * getData() returns a list of six arrays: {int[], float[], char[], 705 * String[], long[] and double[]}. 706 * 707 * @return the memory buffer of the dataset. 708 */ 709 public final Object getData() throws Exception, OutOfMemoryError { 710 if (!isDataLoaded) { 711 log.trace("getData: read"); 712 data = read(); // load the data; 713 originalBuf = data; 714 isDataLoaded = true; 715 nPoints = 1; 716 for (int j = 0; j < selectedDims.length; j++) { 717 nPoints *= selectedDims[j]; 718 } 719 log.trace("getData: read {}", nPoints); 720 } 721 722 return data; 723 } 724 725 /** 726 * @deprecated Not for public use in the future. 727 * <p> 728 * setData() is not safe to use because it changes memory buffer 729 * of the dataset object. Dataset operation such as write/read 730 * will fail if the buffer type or size is changed. 731 */ 732 @Deprecated 733 public final void setData(Object d) { 734 data = d; 735 } 736 737 /** 738 * Clears the data buffer in memory and to force the next read() to load 739 * data from file. 740 * <p> 741 * The function read() loads data from file into memory only if the data is 742 * not read. If data is already in memory, read() just returns the memory 743 * buffer. Sometimes we want to force read() to re-read data from file. For 744 * example, when the selection is changed, we need to re-read the data. 745 * 746 * clearData() clears the current memory buffer and force the read() to load 747 * the data from file. 748 * 749 * @see #getData() 750 * @see #read() 751 */ 752 public void clearData() { 753 isDataLoaded = false; 754 } 755 756 /** 757 * Returns the dimension size of the vertical axis. 758 * 759 * <p> 760 * This function is used by GUI applications such as HDFView. GUI 761 * applications display a dataset in a 2D table or 2D image. The display 762 * order is specified by the index array of selectedIndex as follow: 763 * <dl> 764 * <dt>selectedIndex[0] -- height</dt> 765 * <dd>The vertical axis</dd> 766 * <dt>selectedIndex[1] -- width</dt> 767 * <dd>The horizontal axis</dd> 768 * <dt>selectedIndex[2] -- depth</dt> 769 * <dd>The depth axis is used for 3 or more dimensional datasets.</dd> 770 * </dl> 771 * Applications can use getSelectedIndex() to access and change the display 772 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 773 * following code will set the height=200 and width=50. 774 * 775 * <pre> 776 * long[] selectedIndex = dataset.getSelectedIndex(); 777 * selectedIndex[0] = 0; 778 * selectedIndex[1] = 1; 779 * </pre> 780 * 781 * @see #getSelectedIndex() 782 * @see #getWidth() 783 * 784 * @return the size of dimension of the vertical axis. 785 */ 786 public final int getHeight() { 787 if (rank < 0) init(); 788 789 if ((selectedDims == null) || (selectedIndex == null)) { 790 return 0; 791 } 792 793 return (int) selectedDims[selectedIndex[0]]; 794 } 795 796 /** 797 * Returns the size of dimension of the horizontal axis. 798 * 799 * <p> 800 * This function is used by GUI applications such as HDFView. GUI 801 * applications display dataset a 2D Table or 2D Image. The display order is 802 * specified by the index array of selectedIndex as follow: 803 * <dl> 804 * <dt>selectedIndex[0] -- height</dt> 805 * <dd>The vertical axis</dd> 806 * <dt>selectedIndex[1] -- width</dt> 807 * <dd>The horizontal axis</dd> 808 * <dt>selectedIndex[2] -- depth</dt> 809 * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd> 810 * </dl> 811 * Applications can use getSelectedIndex() to access and change the display 812 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 813 * following code will set the height=200 and width=100. 814 * 815 * <pre> 816 * long[] selectedIndex = dataset.getSelectedIndex(); 817 * selectedIndex[0] = 0; 818 * selectedIndex[1] = 1; 819 * </pre> 820 * 821 * @see #getSelectedIndex() 822 * @see #getHeight() 823 * 824 * @return the size of dimension of the horizontal axis. 825 */ 826 public final int getWidth() { 827 if (rank < 0) init(); 828 829 if ((selectedDims == null) || (selectedIndex == null)) { 830 return 0; 831 } 832 833 if ((selectedDims.length < 2) || (selectedIndex.length < 2)) { 834 return 1; 835 } 836 837 return (int) selectedDims[selectedIndex[1]]; 838 } 839 840 /** 841 * Returns the indices of display order. 842 * <p> 843 * 844 * selectedIndex[] is provided for two purpose: 845 * <OL> 846 * <LI> 847 * selectedIndex[] is used to indicate the order of dimensions for display. 848 * selectedIndex[0] is for the row, selectedIndex[1] is for the column and 849 * selectedIndex[2] for the depth. 850 * <p> 851 * For example, for a four dimesion dataset, if selectedIndex[] = {1, 2, 3}, 852 * then dim[1] is selected as row index, dim[2] is selected as column index 853 * and dim[3] is selected as depth index. 854 * <LI> 855 * selectedIndex[] is also used to select dimensions for display for 856 * datasets with three or more dimensions. We assume that applications such 857 * as HDFView can only display data values up to three dimension (2D 858 * spreadsheet/image with a third dimension which the 2D spreadsheet/image 859 * is selected from). For dataset with more than three dimensions, we need 860 * selectedIndex[] to tell applications which three dimensions are chosen 861 * for display. <br> 862 * For example, for a four dimesion dataset, if selectedIndex[] = {1, 2, 3}, 863 * then dim[1] is selected as row index, dim[2] is selected as column index 864 * and dim[3] is selected as depth index. dim[0] is not selected. Its 865 * location is fixed at 0 by default. 866 * </OL> 867 * 868 * @return the array of the indices of display order. 869 */ 870 public final int[] getSelectedIndex() { 871 if (rank < 0) init(); 872 873 return selectedIndex; 874 } 875 876 /** 877 * Returns the string representation of compression information. 878 * <p> 879 * For example, 880 * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED". 881 * 882 * @return the string representation of compression information. 883 */ 884 public final String getCompression() { 885 if (rank < 0) init(); 886 887 return compression; 888 } 889 890 /** 891 * Returns the string representation of filter information. 892 * 893 * @return the string representation of filter information. 894 */ 895 public final String getFilters() { 896 if (rank < 0) init(); 897 898 return filters; 899 } 900 901 /** 902 * Returns the string representation of storage information. 903 * 904 * @return the string representation of storage information. 905 */ 906 public final String getStorage() { 907 if (rank < 0) init(); 908 909 return storage; 910 } 911 912 /** 913 * Returns the array that contains the dimension sizes of the chunk of the 914 * dataset. Returns null if the dataset is not chunked. 915 * 916 * @return the array of chunk sizes or returns null if the dataset is not 917 * chunked. 918 */ 919 public final long[] getChunkSize() { 920 if (rank < 0) init(); 921 922 return chunkSize; 923 } 924 925 /** 926 * @deprecated Not for public use in the future. <br> 927 * Using {@link #convertFromUnsignedC(Object, Object)} 928 */ 929 @Deprecated 930 public static Object convertFromUnsignedC(Object data_in) { 931 return Dataset.convertFromUnsignedC(data_in, null); 932 } 933 934 /** 935 * Converts one-dimension array of unsigned C-type integers to a new array 936 * of appropriate Java integer in memory. 937 * <p> 938 * Since Java does not support unsigned integer, values of unsigned C-type 939 * integers must be converted into its appropriate Java integer. Otherwise, 940 * the data value will not displayed correctly. For example, if an unsigned 941 * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of 942 * the correct value of 200. 943 * <p> 944 * Unsigned C integers are upgrade to Java integers according to the 945 * following table: 946 * <TABLE CELLSPACING=0 BORDER=1 CELLPADDING=5 WIDTH=400> 947 * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption> 948 * <TR> 949 * <TD><B>Unsigned C Integer</B></TD> 950 * <TD><B>JAVA Intege</B>r</TD> 951 * </TR> 952 * <TR> 953 * <TD>unsigned byte</TD> 954 * <TD>signed short</TD> 955 * </TR> 956 * <TR> 957 * <TD>unsigned short</TD> 958 * <TD>signed int</TD> 959 * </TR> 960 * <TR> 961 * <TD>unsigned int</TD> 962 * <TD>signed long</TD> 963 * </TR> 964 * <TR> 965 * <TD>unsigned long</TD> 966 * <TD>signed long</TD> 967 * </TR> 968 * </TABLE> 969 * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers. 970 * Therefore, the values of unsigned 64-bit dataset may be wrong in Java 971 * application</strong>. 972 * <p> 973 * If memory data of unsigned integers is converted by 974 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 975 * the data back to unsigned C before data is written into file. 976 * 977 * @see #convertToUnsignedC(Object, Object) 978 * 979 * @param data_in 980 * the input 1D array of the unsigned C-type integers. 981 * @param data_out 982 * the output converted (or upgraded) 1D array of Java integers. 983 * 984 * @return the upgraded 1D array of Java integers. 985 */ 986 public static Object convertFromUnsignedC(Object data_in, Object data_out) { 987 if (data_in == null) { 988 return null; 989 } 990 991 Class data_class = data_in.getClass(); 992 if (!data_class.isArray()) { 993 return null; 994 } 995 996 if (data_out != null) { 997 Class data_class_out = data_out.getClass(); 998 if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) { 999 data_out = null; 1000 } 1001 } 1002 1003 String cname = data_class.getName(); 1004 char dname = cname.charAt(cname.lastIndexOf("[") + 1); 1005 int size = Array.getLength(data_in); 1006 log.trace("convertFromUnsignedC: cname={} dname={} size={}", cname, dname, size); 1007 1008 if (dname == 'B') { 1009 short[] sdata = null; 1010 if (data_out == null) { 1011 sdata = new short[size]; 1012 } 1013 else { 1014 sdata = (short[]) data_out; 1015 } 1016 1017 byte[] bdata = (byte[]) data_in; 1018 for (int i = 0; i < size; i++) { 1019 sdata[i] = (short) ((bdata[i] + 256) & 0xFF); 1020 } 1021 1022 data_out = sdata; 1023 } 1024 else if (dname == 'S') { 1025 int[] idata = null; 1026 if (data_out == null) { 1027 idata = new int[size]; 1028 } 1029 else { 1030 idata = (int[]) data_out; 1031 } 1032 1033 short[] sdata = (short[]) data_in; 1034 for (int i = 0; i < size; i++) { 1035 idata[i] = (sdata[i] + 65536) & 0xFFFF; 1036 } 1037 1038 data_out = idata; 1039 } 1040 else if (dname == 'I') { 1041 long[] ldata = null; 1042 if (data_out == null) { 1043 ldata = new long[size]; 1044 } 1045 else { 1046 ldata = (long[]) data_out; 1047 } 1048 1049 int[] idata = (int[]) data_in; 1050 for (int i = 0; i < size; i++) { 1051 ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL; 1052 } 1053 1054 data_out = ldata; 1055 } 1056 else { 1057 data_out = data_in; 1058 log.debug("convertFromUnsignedC: Java does not support unsigned long"); 1059 } 1060 1061 return data_out; 1062 } 1063 1064 /** 1065 * @deprecated Not for public use in the future. <br> 1066 * Using {@link #convertToUnsignedC(Object, Object)} 1067 */ 1068 @Deprecated 1069 public static Object convertToUnsignedC(Object data_in) { 1070 return Dataset.convertToUnsignedC(data_in, null); 1071 } 1072 1073 /** 1074 * Converts the array of converted unsigned integer back to unsigned C-type 1075 * integer data in memory. 1076 * <p> 1077 * If memory data of unsigned integers is converted by 1078 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 1079 * the data back to unsigned C before data is written into file. 1080 * 1081 * @see #convertFromUnsignedC(Object, Object) 1082 * 1083 * @param data_in 1084 * the input array of the Java integer. 1085 * @param data_out 1086 * the output array of the unsigned C-type integer. 1087 * 1088 * @return the converted data of unsigned C-type integer array. 1089 */ 1090 public static Object convertToUnsignedC(Object data_in, Object data_out) { 1091 if (data_in == null) { 1092 return null; 1093 } 1094 1095 Class data_class = data_in.getClass(); 1096 if (!data_class.isArray()) { 1097 return null; 1098 } 1099 1100 if (data_out != null) { 1101 Class data_class_out = data_out.getClass(); 1102 if (!data_class_out.isArray() || (Array.getLength(data_in) != Array.getLength(data_out))) { 1103 data_out = null; 1104 } 1105 } 1106 1107 String cname = data_class.getName(); 1108 char dname = cname.charAt(cname.lastIndexOf("[") + 1); 1109 int size = Array.getLength(data_in); 1110 log.trace("convertToUnsignedC: cname={} dname={} size={}", cname, dname, size); 1111 1112 if (dname == 'S') { 1113 byte[] bdata = null; 1114 if (data_out == null) { 1115 bdata = new byte[size]; 1116 } 1117 else { 1118 bdata = (byte[]) data_out; 1119 } 1120 short[] sdata = (short[]) data_in; 1121 for (int i = 0; i < size; i++) { 1122 bdata[i] = (byte) sdata[i]; 1123 } 1124 data_out = bdata; 1125 } 1126 else if (dname == 'I') { 1127 short[] sdata = null; 1128 if (data_out == null) { 1129 sdata = new short[size]; 1130 } 1131 else { 1132 sdata = (short[]) data_out; 1133 } 1134 int[] idata = (int[]) data_in; 1135 for (int i = 0; i < size; i++) { 1136 sdata[i] = (short) idata[i]; 1137 } 1138 data_out = sdata; 1139 } 1140 else if (dname == 'J') { 1141 int[] idata = null; 1142 if (data_out == null) { 1143 idata = new int[size]; 1144 } 1145 else { 1146 idata = (int[]) data_out; 1147 } 1148 long[] ldata = (long[]) data_in; 1149 for (int i = 0; i < size; i++) { 1150 idata[i] = (int) ldata[i]; 1151 } 1152 data_out = idata; 1153 } 1154 else { 1155 data_out = data_in; 1156 log.debug("convertToUnsignedC: Java does not support unsigned long"); 1157 } 1158 1159 return data_out; 1160 } 1161 1162 /** 1163 * Converts an array of bytes into an array of Strings for a fixed string 1164 * dataset. 1165 * <p> 1166 * A C-string is an array of chars while an Java String is an object. When a 1167 * string dataset is read into Java application, the data is stored in an 1168 * array of Java bytes. byteToString() is used to convert the array of bytes 1169 * into array of Java strings so that applications can display and modify 1170 * the data content. 1171 * <p> 1172 * For example, the content of a two element C string dataset is {"ABC", 1173 * "abc"}. Java applications will read the data into an byte array of {65, 1174 * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java 1175 * String of strs[0]="ABC", and strs[1]="abc". 1176 * <p> 1177 * If memory data of strings is converted to Java Strings, stringToByte() 1178 * must be called to convert the memory data back to byte array before data 1179 * is written to file. 1180 * 1181 * @see #stringToByte(String[], int) 1182 * 1183 * @param bytes 1184 * the array of bytes to convert. 1185 * @param length 1186 * the length of string. 1187 * 1188 * @return the array of Java String. 1189 */ 1190 public static final String[] byteToString(byte[] bytes, int length) { 1191 if (bytes == null) { 1192 return null; 1193 } 1194 1195 int n = bytes.length / length; 1196 log.trace("byteToString: n={} from length of {}", n, length); 1197 // String bigstr = new String(bytes); 1198 String[] strArray = new String[n]; 1199 String str = null; 1200 int idx = 0; 1201 for (int i = 0; i < n; i++) { 1202 str = new String(bytes, i * length, length); 1203 // bigstr.substring uses less memory space 1204 // NOTE: bigstr does not work on linux if bytes.length is very large 1205 // see bug 1091 1206 // offset = i*length; 1207 // str = bigstr.substring(offset, offset+length); 1208 1209 idx = str.indexOf('\0'); 1210 if (idx > 0) { 1211 str = str.substring(0, idx); 1212 } 1213 1214 // trim only the end 1215 int end = str.length(); 1216 while (end > 0 && str.charAt(end - 1) <= '\u0020') 1217 end--; 1218 1219 strArray[i] = (end <= 0) ? "" : str.substring(0, end); 1220 1221 // trim both start and end 1222 // strArray[i] = str.trim(); 1223 } 1224 1225 return strArray; 1226 } 1227 1228 /** 1229 * Converts a string array into an array of bytes for a fixed string 1230 * dataset. 1231 * <p> 1232 * If memory data of strings is converted to Java Strings, stringToByte() 1233 * must be called to convert the memory data back to byte array before data 1234 * is written to file. 1235 * 1236 * @see #byteToString(byte[] bytes, int length) 1237 * 1238 * @param strings 1239 * the array of string. 1240 * @param length 1241 * the length of string. 1242 * 1243 * @return the array of bytes. 1244 */ 1245 public static final byte[] stringToByte(String[] strings, int length) { 1246 if (strings == null) { 1247 return null; 1248 } 1249 1250 int size = strings.length; 1251 byte[] bytes = new byte[size * length]; 1252 log.trace("stringToByte: size={} length={}", size, length); 1253 StringBuffer strBuff = new StringBuffer(length); 1254 for (int i = 0; i < size; i++) { 1255 // initialize the string with spaces 1256 strBuff.replace(0, length, " "); 1257 1258 if (strings[i] != null) { 1259 if (strings[i].length() > length) { 1260 strings[i] = strings[i].substring(0, length); 1261 } 1262 strBuff.replace(0, length, strings[i]); 1263 } 1264 1265 strBuff.setLength(length); 1266 System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length); 1267 } 1268 1269 return bytes; 1270 } 1271 1272 /** 1273 * Returns the array of strings that represent the dimension names. Returns 1274 * null if there is no dimension name. 1275 * <p> 1276 * Some datasets have pre-defined names for each dimension such as 1277 * "Latitude" and "Longitude". getDimNames() returns these pre-defined 1278 * names. 1279 * 1280 * @return the names of dimensions, or null if there is no dimension name. 1281 */ 1282 public final String[] getDimNames() { 1283 if (rank < 0) init(); 1284 1285 return dimNames; 1286 } 1287 1288 /** 1289 * Checks if a given datatype is a string. Sub-classes must replace this 1290 * default implementation. 1291 * 1292 * @param tid 1293 * The data type identifier. 1294 * 1295 * @return true if the datatype is a string; otherwise returns false. 1296 */ 1297 public boolean isString(int tid) { 1298 return false; 1299 } 1300 1301 /** 1302 * Returns the size in bytes of a given datatype. Sub-classes must replace 1303 * this default implementation. 1304 * 1305 * @param tid 1306 * The data type identifier. 1307 * 1308 * @return The size of the datatype 1309 */ 1310 public int getSize(int tid) { 1311 return -1; 1312 } 1313 1314 /** 1315 * Get flag that indicate if enum data is converted to strings. 1316 * 1317 * @return the enumConverted 1318 */ 1319 public boolean isEnumConverted() { 1320 return enumConverted; 1321 } 1322 1323 /** 1324 * Set flag that indicate if enum data is converted to strings. 1325 * 1326 * @param b 1327 * the enumConverted to set 1328 */ 1329 public void setEnumConverted(boolean b) { 1330 if (enumConverted != b) { 1331 originalBuf = convertedBuf = null; 1332 this.clearData(); 1333 } 1334 1335 enumConverted = b; 1336 } 1337 1338 /** 1339 * Get Class of the original data buffer if converted. 1340 * 1341 * @return the Class of originalBuf 1342 */ 1343 public final Class getOriginalClass() { 1344 return originalBuf.getClass(); 1345 } 1346}