Changeset 5435


Ignore:
Timestamp:
09/03/09 18:34:14 (11 years ago)
Author:
curtis
Message:

Add RandomAccessInputStream.findString methods for efficiently
searching an input stream for a terminating string.
RandomAccessInputStream.readString(String) now delegates to findString,
and has not been extensively tested, but initial testing looks good.

Location:
trunk/components
Files:
1 added
4 edited
2 moved

Legend:

Unmodified
Added
Removed
  • trunk/components/bio-formats/src/loci/formats/in/ICSReader.java

    r5416 r5435  
    5454 
    5555  // -- Constants -- 
     56 
     57  /** Newline characters. */ 
     58  public static final String NL = "\r\n"; 
    5659 
    5760  public static final String[] DATE_FORMATS = { 
     
    284287    RandomAccessInputStream reader = new RandomAccessInputStream(icsId); 
    285288    reader.seek(0); 
    286     reader.readString("\r\n"); 
    287     String line = reader.readString("\r\n"); 
     289    reader.readString(NL); 
     290    String line = reader.readString(NL); 
    288291    boolean signed = false; 
    289292 
     
    518521        } 
    519522      } 
    520       line = reader.readString("\r\n"); 
     523      line = reader.readString(NL); 
    521524    } 
    522525    reader.close(); 
     
    604607 
    605608    if (versionTwo) { 
    606       String s = in.readString("\r\n"); 
    607       while (!s.trim().equals("end")) s = in.readString("\r\n"); 
     609      String s = in.readString(NL); 
     610      while (!s.trim().equals("end")) s = in.readString(NL); 
    608611    } 
    609612 
  • trunk/components/common/src/loci/common/RandomAccessInputStream.java

    r5408 r5435  
    3131import java.io.IOException; 
    3232import java.io.InputStream; 
     33import java.io.InputStreamReader; 
    3334import java.util.Hashtable; 
    3435 
     
    4344 * 
    4445 * @author Melissa Linkert linkert at wisc.edu 
     46 * @author Curtis Rueden ctrueden at wisc.edu 
    4547 */ 
    4648public class RandomAccessInputStream extends InputStream implements DataInput { 
     
    5052  /** Maximum size of the buffer used by the DataInputStream. */ 
    5153  protected static final int MAX_OVERHEAD = 1048576; 
     54 
     55  /** 
     56   * Block size to use when searching through the stream. 
     57   * This value should not exceed MAX_OVERHEAD! 
     58   */ 
     59  protected static final int DEFAULT_BLOCK_SIZE = 256 * 1024; // 256 KB 
     60 
     61  /** Maximum number of bytes to search when searching through the stream. */ 
     62  protected static final int MAX_SEARCH_SIZE = 512 * 1024 * 1024; // 512 MB 
    5263 
    5364  /** Maximum number of open files. */ 
     
    186197  /** 
    187198   * Reads a string ending with one of the characters in the given string. 
    188    * @see readCString() 
    189    * @see readLine() 
     199   * 
     200   * @see findString(String) 
    190201   */ 
    191202  public String readString(String lastChars) throws IOException { 
    192     StringBuffer sb = new StringBuffer(); 
    193     char c = readChar(); 
    194     while (lastChars.indexOf(c) == -1 && getFilePointer() < length()) { 
    195       sb = sb.append(c); 
    196       c = readChar(); 
    197     } 
    198     return sb.toString(); 
     203    String[] terminators = new String[lastChars.length()]; 
     204    for (int i=0; i<terminators.length; i++) { 
     205      terminators[i] = lastChars.substring(i, i + 1); 
     206    } 
     207    return findString(terminators); 
     208  } 
     209 
     210  /** 
     211   * Reads a string ending with one of the given terminating substrings. 
     212   * 
     213   * @param terminators The strings for which to search. 
     214   * 
     215   * @return The string from the initial position through the end of the 
     216   *   terminating sequence, or through the end of the stream if no 
     217   *   terminating sequence is found. 
     218   */ 
     219  public String findString(String... terminators) throws IOException { 
     220    return findString(DEFAULT_BLOCK_SIZE, terminators); 
     221  } 
     222 
     223  /** 
     224   * Reads a string ending with one of the given terminating substrings, 
     225   * using the specified block size for buffering. 
     226   * 
     227   * @param terminators The strings for which to search. 
     228   * @param blockSize The block size to use when reading bytes in chunks. 
     229   * 
     230   * @throws IOException If the maximum search length (512 MB) is exceeded. 
     231   * 
     232   * @return The string from the initial position through the end of the 
     233   *   terminating sequence, or through the end of the stream if no 
     234   *   terminating sequence is found. 
     235   */ 
     236  public String findString(int blockSize, String... terminators) 
     237    throws IOException 
     238  { 
     239    StringBuilder out = new StringBuilder(); 
     240    long startPos = getFilePointer(); 
     241    long inputLen = length(); 
     242    long maxLen = inputLen - startPos; 
     243    if (maxLen > MAX_SEARCH_SIZE) maxLen = MAX_SEARCH_SIZE; 
     244    boolean match = false; 
     245 
     246    InputStreamReader in = new InputStreamReader(this); 
     247    char[] buf = new char[blockSize]; 
     248    int i = 0; 
     249    while (i < maxLen) { 
     250      long pos = startPos + i; 
     251      int num = blockSize; 
     252      if (pos + blockSize > inputLen) num = (int) (inputLen - pos); 
     253 
     254      // read block from stream 
     255      int r = in.read(buf, 0, blockSize); 
     256      if (r <= 0) throw new IOException("Cannot read from stream: " + r); 
     257 
     258      // append block to output 
     259      out.append(buf, 0, r); 
     260 
     261      // check output 
     262      for (String term : terminators) { 
     263        int tagLen = term.length(); 
     264        int index = out.indexOf(term, i == 0 ? 0 : i - tagLen); 
     265        if (index >= 0) { 
     266          match = true; 
     267          seek(index + tagLen); // reset input stream to proper location 
     268          out.setLength(index + tagLen); // trim output 
     269          break; 
     270        } 
     271      } 
     272      if (match) break; 
     273      i += r; 
     274    } 
     275 
     276    if (!match) throw new IOException("Maximum search length reached."); 
     277 
     278    return out.toString(); 
    199279  } 
    200280 
     
    249329  /** Read the next line of text from the input stream. */ 
    250330  public String readLine() throws IOException { 
    251     return readString("\n"); 
     331    return findString("\n"); 
    252332  } 
    253333 
    254334  /** Read a string of arbitrary length, terminated by a null char. */ 
    255335  public String readCString() throws IOException { 
    256     return readString("\0"); 
     336    return findString("\0"); 
    257337  } 
    258338 
     
    260340  public String readString(int n) throws IOException { 
    261341    byte[] b = new byte[n]; 
    262     read(b); 
     342    readFully(b); 
    263343    return new String(b); 
    264344  } 
  • trunk/components/test-suite/bftest

    r3899 r5435  
    1313DIRS=`du -s $DIR/* | sort -n | sed -e 's/^[0-9]*\s*//g'` 
    1414cd ~/svn/java 
     15echo --------=========== Updating source code ===========-------- 
     16svn up 
     17ant jars 
     18cd components/test-suite 
    1519for f in $DIRS 
    1620do 
     
    2327  then 
    2428    echo --------=========== Testing $f ===========-------- 
    25     svn up 
    2629    ant -Dtestng.directory="$f" test-all 
    2730  fi 
  • trunk/components/test-suite/build.properties

    r5043 r5435  
    1111                           ${artifact.dir}/bio-formats.jar:\ 
    1212                           ${lib.dir}/testng-5.7-jdk14.jar 
    13 component.java-version   = 1.4 
     13component.java-version   = 1.5 
    1414component.deprecation    = true 
    1515 
  • trunk/components/test-suite/src/loci/tests/IOTester.java

    r5419 r5435  
    6565    int rest = middle + TAG.length(); 
    6666 
    67     LogTools.println("Generating data: " + middle + " alphanumeric + " + 
    68       TAG.length() + " divider tag + " + left + " binary"); 
     67    long middlePercent = 100L * middle / SIZE; 
     68    long leftPercent = 100L * left / SIZE; 
     69 
     70    LogTools.println("Generating data: " + middle + " (" + middlePercent + 
     71      "%) alphanumeric + " + left + " (" + leftPercent + "%) binary"); 
    6972 
    7073    long progress = 0; 
     
    143146 
    144147    RandomAccessInputStream in = new RandomAccessInputStream(filename); 
    145     StringBuilder buffer = new StringBuilder(); 
    146     int tagLen = TAG.length(); 
    147     int inputLen = (int) in.length(); 
    148     for (int i=0; i<inputLen; i+=blockSize) { 
    149       int num = i + blockSize > inputLen ? inputLen - i : blockSize; 
    150       String s = in.readString(num); 
    151       buffer.append(s); 
    152       int match = buffer.indexOf(TAG, i == 0 ? i : i - tagLen); 
    153       if (match >= 0) { 
    154         in.seek(match + tagLen); 
    155         break; 
    156       } 
    157     } 
    158     long offset = in.getFilePointer(); 
     148    long offset = in.findString(blockSize, TAG).length(); 
    159149    in.close(); 
    160150 
Note: See TracChangeset for help on using the changeset viewer.