001 package org.LiveGraph.dataFile.read;
002
003 import java.io.BufferedReader;
004 import java.io.Closeable;
005 import java.io.IOException;
006 import java.io.InputStream;
007 import java.io.InputStreamReader;
008 import java.util.ArrayList;
009 import java.util.Collections;
010 import java.util.HashMap;
011 import java.util.List;
012 import java.util.Map;
013
014 import org.LiveGraph.dataFile.common.DataFormatException;
015
016 import com.softnetConsult.utils.exceptions.Bug;
017
018
019 import static org.LiveGraph.dataFile.common.DataFormatTools.*;
020
021
022 /**
023 * A reader for a data stream (usually, a CSV file). This reader
024 * will parse the data stream and extract the file information, the data
025 * series headings and the actual data.<br />
026 * <br />
027 * The information extracted from the data stream is passed to the application
028 * using an observer pattern: after a line was parsed, the appropriate
029 * {@code notifyXXXX(...)}-method of this class is called with the extracted
030 * information. The {@code notifyXXXX(...)}-methods dispatch appropriate
031 * notifications to all {@link DataStreamObserver}-objects registered with this
032 * {@code DataStreamReader}-instance.<br />
033 * If required, an application may also overwrite the {@code notifyXXXX(...)}-methods
034 * to handle data read events.<br />
035 * <br />
036 * See {@link org.LiveGraph.dataFile.write.DataStreamWriter} for the details of the
037 * data file format.<br />
038 * <br />
039 * Note, that this class has a different role than it did in version 1.01 of the
040 * LiveGraph API. The {@code DataStreamReader} class from version 1.01 is replaced by
041 * {@link org.LiveGraph.dataCache.DataStreamToCacheReader}.
042 *
043 * <p><strong>LiveGraph</strong> (http://www.live-graph.org).</p>
044 * <p>Copyright (c) 2007 by G. Paperin.</p>
045 * <p>File: DataStreamReader.java</p>
046 * <p style="font-size:smaller;">Redistribution and use in source and binary forms, with or
047 * without modification, are permitted provided that the following terms and conditions are met:
048 * </p>
049 * <p style="font-size:smaller;">1. Redistributions of source code must retain the above
050 * acknowledgement of the LiveGraph project and its web-site, the above copyright notice,
051 * this list of conditions and the following disclaimer.<br />
052 * 2. Redistributions in binary form must reproduce the above acknowledgement of the
053 * LiveGraph project and its web-site, the above copyright notice, this list of conditions
054 * and the following disclaimer in the documentation and/or other materials provided with
055 * the distribution.<br />
056 * 3. All advertising materials mentioning features or use of this software or any derived
057 * software must display the following acknowledgement:<br />
058 * <em>This product includes software developed by the LiveGraph project and its
059 * contributors.<br />(http://www.live-graph.org)</em><br />
060 * 4. All advertising materials distributed in form of HTML pages or any other technology
061 * permitting active hyper-links that mention features or use of this software or any
062 * derived software must display the acknowledgment specified in condition 3 of this
063 * agreement, and in addition, include a visible and working hyper-link to the LiveGraph
064 * homepage (http://www.live-graph.org).
065 * </p>
066 * <p style="font-size:smaller;">THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY
067 * OF ANY KIND, EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
068 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
069 * THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
070 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
071 * IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
072 * </p>
073 *
074 * @author Greg Paperin (http://www.paperin.org)
075 * @version {@value org.LiveGraph.LiveGraph#version}
076 * @see DataStreamObserver
077 * @see DataStreamObserverAdapter
078 * @see org.LiveGraph.dataCache.DataStreamToCacheReader
079 */
080 public class DataStreamReader implements Closeable {
081
082 /**
083 * Data stream reader.
084 */
085 private BufferedReader in = null;
086
087 /**
088 * Data values separator.
089 */
090 private String separator = DefaultSeparator;
091
092 /**
093 * Whether the data values separator was already finalised.
094 */
095 private boolean separatorSet = false;
096
097 /**
098 * Whether the data series headings are already set-up.
099 */
100 private boolean labelsSet = false;
101
102 /**
103 * The data stream index of the next data record.
104 */
105 private int nextDatasetFileIndex = -1;
106
107 /**
108 * Observers who want to know what's on the data stream.
109 */
110 private List<DataStreamObserver> observers = null;
111
112
113 /**
114 * Creates a data reader on the specified stream.
115 *
116 * @param is The stream from which to read.
117 */
118 public DataStreamReader(InputStream is) {
119
120 if (null == is)
121 throw new NullPointerException("Cannot read from a null stream.");
122
123 this.in = new BufferedReader(new InputStreamReader(is));
124 this.separator = DefaultSeparator;
125 this.separatorSet = false;
126 this.labelsSet = false;
127 this.nextDatasetFileIndex = -1;
128 this.observers = new ArrayList<DataStreamObserver>();
129 }
130
131 /**
132 * Creates a data reader on the specified stream and add one initial observer.
133 *
134 * @param is The stream from which to read.
135 * @param observer An observer for the data stream contents.
136 */
137 public DataStreamReader(InputStream is, DataStreamObserver observer) {
138 this(is);
139 addObserver(observer);
140 }
141
142
143 /**
144 * Tells whether this reader's underlying data stream is ready to be read.
145 *
146 * @return {@code true} if the next {@code readFromStream()} is guaranteed not to block for input,
147 * {@code false} otherwise. Note that returning {@code false} does not guarantee that the next read
148 * will block.
149 * @throws IOException If an I/O error occurs.
150 */
151 public boolean ready() throws IOException {
152 return in.ready();
153 }
154
155 /**
156 * Closes the underlying data stream. Further reading is not possible after calling this method.
157 * @throws IOException If an I/O error occurs.
158 */
159 public void close() throws IOException {
160 in.close();
161 }
162
163 /**
164 * Reads as many data lines from the underlying stream as there are available and parses them.
165 *
166 * @return The number on non-empty data lines read.
167 * @throws IOException If an I/O error occurs.
168 * @throws DataFormatException If the data stream contents do not conform with the expected data
169 * stream format.
170 * @see org.LiveGraph.dataFile.write.DataStreamWriter
171 * @see #readFromStream(int)
172 */
173 public int readFromStream() throws IOException, DataFormatException {
174 return readFromStream(-1);
175 }
176
177 /**
178 * Reads up to a specified number of data lines from the underlying stream, and parses the lines.
179 * Reading is stopped when the specified number of lines in reached or if no more lines are available.
180 *
181 * @param maxLines The maximum number of data lines to read (empty lines are ignored and not counted,
182 * but all other lines including comment lines are counted). If negative, all available lines will
183 * be read.
184 * @return The number on non-empty data lines read.
185 * @throws IOException If an I/O error occurs.
186 * @throws DataFormatException If the data stream contents do not conform with the expected data
187 * stream format.
188 * @see org.LiveGraph.dataFile.write.DataStreamWriter
189 */
190 public int readFromStream(int maxLines) throws IOException, DataFormatException {
191
192 int linesRead = 0;
193 String line = null;
194 while (ready() && (0 > maxLines || linesRead < maxLines) ) {
195 line = in.readLine();
196 line = line.trim();
197 if (line.length() > 0) {
198 processLine(line);
199 linesRead++;
200 }
201 }
202 return linesRead;
203 }
204
205 /**
206 * Notifies observers regestered with this parser of a "data values separator set"-event.
207 *
208 * @param separator New data separator to be passed to the observers.
209 */
210 protected void notifySeparatorSet(String separator) {
211 for (DataStreamObserver observer : observers)
212 observer.eventSeparatorSet(separator, this);
213 }
214
215 /**
216 * Notifies observers regestered with this parser of a "comment line parsed"-event.
217 *
218 * @param comment The parsed comment line to be passed to the observers.
219 */
220 protected void notifyCommentLine(String comment) {
221 for (DataStreamObserver observer : observers)
222 observer.eventCommentLine(comment, this);
223 }
224
225 /**
226 * Notifies observers regestered with this parser of a "file info line parsed"-event.
227 *
228 * @param info The parsed file info to be passed to the observers.
229 */
230 protected void notifyFileInfoLine(String info) {
231 for (DataStreamObserver observer : observers)
232 observer.eventFileInfoLine(info, this);
233 }
234
235 /**
236 * Notifies observers regestered with this parser of a "data series labels parsed"-event.
237 *
238 * @param labels The parsed data series labels to be passed to the observers.
239 */
240 protected void notifyLabelsSet(List<String> labels) {
241 for (DataStreamObserver observer : observers)
242 observer.eventLabelsSet(labels, this);
243 }
244
245 /**
246 * Notifies observers regestered with this parser of a "dataset parsed"-event.
247 *
248 * @param dataTokens The parsed data tokens to be passed to the observers.
249 * @param datasetIndex The file index of the parsed dataset to be passed to the observers.
250 */
251 protected void notifyDataLineRead(List<String> dataTokens, int datasetIndex) {
252 for (DataStreamObserver observer : observers)
253 observer.eventDataLineRead(dataTokens, datasetIndex, this);
254 }
255
256 /**
257 * Adds an observer to this parser.
258 *
259 * @param observer The observer to add.
260 * @return {@code if the specified observer cound not be added because it was already registered},
261 * {@code true otherwise}.
262 */
263 public boolean addObserver(DataStreamObserver observer) {
264 if (null == observer || hasObserver(observer))
265 return false;
266 return observers.add(observer);
267 }
268
269 /**
270 * Checks whether the specified observer is registered with this parser.
271 *
272 * @param observer An observer.
273 * @return {@code true} if the specified {@code observer} is not {@code null} and is regestered
274 * with this parser, {@code false} otherwise.
275 */
276 public boolean hasObserver(DataStreamObserver observer) {
277 if (null == observer)
278 return false;
279 return observers.contains(observer);
280 }
281
282 /**
283 * De-registeres the specified observer from this parser.
284 *
285 * @param observer An observer.
286 * @return {@code true} if the specified observer is not {@code null} and was on the
287 * list of registered observers and is now removed from this list, {@code false} otherwise.
288 */
289 public boolean removeObserver(DataStreamObserver observer) {
290 if (null == observer)
291 return false;
292 return observers.remove(observer);
293 }
294
295 /**
296 * Counts this parser's observers.
297 *
298 * @return The number of observers registered with this parser.
299 */
300 public int countObservers() {
301 return observers.size();
302 }
303
304 /**
305 * This static utility method converts a list of {@code String} tokens (presumably just parsed
306 * from a data line) to a list of {@code Double} objects containing the tokens' values; tokens
307 * that cannot be parsed to a {@code Double} are represented by {@code null}-objects in the
308 * resulting list.
309 *
310 * @param tokens A list of data tokens.
311 * @return A list of the double values of the specified tokens.
312 */
313 public static List<Double> convertTokensToDoubles(List<String> tokens) {
314
315 if (null == tokens)
316 return Collections.emptyList();
317
318 List<Double> doubles = new ArrayList<Double>(tokens.size());
319 for (String tok : tokens) {
320
321 if (null == tok)
322 continue;
323
324 tok = tok.trim();
325
326 Double val = null;
327 if (null != tok && 0 < tok.length()) {
328 try { val = Double.valueOf(tok); }
329 catch (NumberFormatException e) { val = null; }
330 }
331
332 doubles.add(val);
333 }
334 return doubles;
335 }
336
337
338 /**
339 * This static utility method converts a list of strings (presumably representing a list of
340 * labels just parsed from the data file) to a list of strings where each string is unique
341 * in respect to its {@code equals} method (case sensitive); this happens by attaching
342 * counters to repreated strings: for instance, {@code ["boo", "foo", "boo"]} it converted to
343 * {@code ["boo (1)", "foo", "boo (2)"]}.
344 *
345 * @param rawLabels The list of labels to convert.
346 * @param allowEmptyLabels If this is {@code false}, all empty strings ({@code ""}) are converted
347 * to underscores ({@code "_"}) before possibly applying the counters.
348 * @return A list of unique data series labels based on the specified list.
349 */
350 public static List<String> createUniqueLabels(List<String> rawLabels, boolean allowEmptyLabels) {
351
352 List<String> uniqueLabels = new ArrayList<String>();
353 Map<String, Integer> labelCounts = new HashMap<String, Integer>();
354
355 // Mark labels which occure more than once:
356 for (String rawLabel : rawLabels) {
357
358 rawLabel = rawLabel.trim();
359 if (!allowEmptyLabels && rawLabel.length() == 0)
360 rawLabel = "_";
361
362 if (!labelCounts.containsKey(rawLabel)) {
363
364 labelCounts.put(rawLabel, 1);
365
366 } else {
367
368 int c = labelCounts.get(rawLabel);
369 labelCounts.put(rawLabel, ++c);
370 rawLabel = rawLabel + " (" + c + ")";
371 }
372
373 uniqueLabels.add(rawLabel);
374 }
375
376 // Change first occurence of "label" into "label (1)" for the labels which appear more than once:
377 for (String label : labelCounts.keySet()) {
378 int c = labelCounts.get(label);
379 if (1 < c) {
380 int p = uniqueLabels.indexOf(label);
381 uniqueLabels.set(p, label + " (1)");
382 }
383 }
384
385 // Done:
386 return uniqueLabels;
387 }
388
389
390 /**
391 * Examines a data line and dispatches to a specialised parsing routine.
392 *
393 * @param line A data line.
394 * @throws DataFormatException If the data stream contents do not conform with the expected data
395 * stream format.
396 */
397 private void processLine(String line) throws DataFormatException {
398
399 if (!separatorSet && line.startsWith(TAGSepDefinition) && line.endsWith(TAGSepDefinition)) {
400 processSeparatorDefinitionLine(line);
401 return;
402 }
403
404 if (line.startsWith(TAGComment)) {
405 processCommentLine(line);
406 return;
407 }
408
409 if (line.startsWith(TAGFileInfo)) {
410 processFileInfoLine(line);
411 return;
412 }
413
414 if (!labelsSet) {
415 processSeriesLabelsLine(line);
416 return;
417 }
418
419 if (true) {
420 processDataLine(line);
421 return;
422 }
423
424 throw new Bug("The program should never get to this line!");
425 }
426
427 /**
428 * Parses a data values separator definition line.
429 *
430 * @param line Data line to parse.
431 * @throws DataFormatException If the data line contents are not in the expected format.
432 */
433 private void processSeparatorDefinitionLine(String line) throws DataFormatException {
434
435 if (line.length() < TAGSepDefinition.length() * 2)
436 throw new DataFormatException("Illegal separator definition: \"" + line + "\"");
437
438 if (line.length() == TAGSepDefinition.length() * 2)
439 throw new DataFormatException("Illegal separator definition: separator may not be an empty string");
440
441 String sep = line.substring(TAGSepDefinition.length(), line.length() - TAGSepDefinition.length());
442
443 String problem = isValidSeparator(sep);
444 if (null != problem)
445 throw new DataFormatException("Illegal separator definition: " + problem);
446
447 separator = sep;
448 separatorSet = true;
449 notifySeparatorSet(separator);
450 }
451
452 /**
453 * Parses a comments line.
454 *
455 * @param line Data line to parse.
456 * @throws DataFormatException If the data line contents are not in the expected format.
457 */
458 private void processCommentLine(String line) throws DataFormatException {
459 String comment = "";
460 if (line.length() > TAGComment.length())
461 comment = line.substring(TAGComment.length()).trim();
462
463 separatorSet = true;
464 notifyCommentLine(comment);
465 }
466
467 /**
468 * Parses a file information line.
469 *
470 * @param line Data line to parse.
471 * @throws DataFormatException If the data line contents are not in the expected format.
472 */
473 private void processFileInfoLine(String line) throws DataFormatException {
474 String info = "";
475 if (line.length() > TAGFileInfo.length())
476 info = line.substring(TAGFileInfo.length()).trim();
477
478 separatorSet = true;
479 notifyFileInfoLine(info);
480 }
481
482 /**
483 * Parses a data series headings line.
484 *
485 * @param line Data line to parse.
486 * @throws DataFormatException If the data line contents are not in the expected format.
487 */
488 private void processSeriesLabelsLine(String line) throws DataFormatException {
489
490 DataLineTokenizer tok = new DataLineTokenizer(line, separator);
491 nextDatasetFileIndex = 0;
492 labelsSet = true;
493 separatorSet = true;
494 notifyLabelsSet(Collections.unmodifiableList(tok.getTokens()));
495 }
496
497 /**
498 * Parses a data line.
499 *
500 * @param line Data line to parse.
501 * @throws DataFormatException If the data line contents are not in the expected format.
502 */
503 private void processDataLine(String line) throws DataFormatException {
504
505 DataLineTokenizer tok = new DataLineTokenizer(line, separator);
506 separatorSet = true;
507 notifyDataLineRead(tok.getTokens(), nextDatasetFileIndex++);
508 }
509
510 }