degu.degudocumentbuilder.ejb
Class PageTokenizer

java.lang.Object
  extended by org.apache.lucene.analysis.TokenStream
      extended by org.apache.lucene.analysis.Tokenizer
          extended by degu.degudocumentbuilder.ejb.DeguDocTokenizer
              extended by degu.degudocumentbuilder.ejb.PageTokenizer

public class PageTokenizer
extends DeguDocTokenizer

This tokenizer emits raw text between tags


Field Summary
private  int curPage
           
private  boolean finalPageEmitted
           
(package private)  java.util.Vector<java.lang.Object> pageContents
          for contents objects belonging to a single page
private  int parsedTextPtr
          index of the element withhin the jdom objects
private  org.jdom.input.SAXBuilder saxBuilder
           
 
Fields inherited from class degu.degudocumentbuilder.ejb.DeguDocTokenizer
jdomDocument
 
Fields inherited from class org.apache.lucene.analysis.Tokenizer
input
 
Constructor Summary
PageTokenizer(org.jdom.Document jdomDocument)
           
PageTokenizer(java.io.Reader input)
           
 
Method Summary
 org.apache.lucene.analysis.Token next()
           
 
Methods inherited from class org.apache.lucene.analysis.Tokenizer
close
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

saxBuilder

private org.jdom.input.SAXBuilder saxBuilder

parsedTextPtr

private int parsedTextPtr
index of the element withhin the jdom objects


curPage

private int curPage

finalPageEmitted

private boolean finalPageEmitted

pageContents

java.util.Vector<java.lang.Object> pageContents
for contents objects belonging to a single page

Constructor Detail

PageTokenizer

public PageTokenizer(java.io.Reader input)

PageTokenizer

public PageTokenizer(org.jdom.Document jdomDocument)
Method Detail

next

public org.apache.lucene.analysis.Token next()
                                      throws java.io.IOException
Specified by:
next in class org.apache.lucene.analysis.TokenStream
Throws:
java.io.IOException