public abstract class AbstractTextEmbeddingIterator extends AbstractSequenceInstanceIterator
Modifier and Type | Field and Description |
---|---|
org.deeplearning4j.models.embeddings.wordvectors.WordVectors |
wordVectors
Loaded word vectors
|
Constructor and Description |
---|
AbstractTextEmbeddingIterator() |
Modifier and Type | Method and Description |
---|---|
org.deeplearning4j.iterator.LabeledSentenceProvider |
getSentenceProvider(weka.core.Instances data)
Create a sentence provider from the given data.
|
Dl4jAbstractStopwords |
getStopwords() |
TokenizerFactory |
getTokenizerFactory() |
TokenPreProcess |
getTokenPreProcess() |
int |
getTruncateLength() |
java.io.File |
getWordVectorLocation() |
org.deeplearning4j.models.embeddings.wordvectors.WordVectors |
getWordVectors() |
void |
initialize()
Initialize the iterator
|
void |
initWordVectors()
Initialize the word vectors from the given file
|
void |
setStopwords(Dl4jAbstractStopwords stopwords) |
void |
setTokenizerFactory(TokenizerFactory tokenizerFactory) |
void |
setTokenPreProcess(TokenPreProcess tokenPreProcess) |
void |
setTruncateLength(int truncateLength) |
void |
setWordVectorLocation(java.io.File file)
Set the word vector location and try to initialize it
|
void |
setWordVectors(org.deeplearning4j.models.embeddings.wordvectors.WordVectors wordVectors) |
getDataSetIterator, getDataSetIterator, getOptions, getTrainBatchSize, listOptions, setOptions, setTrainBatchSize, validate
public transient org.deeplearning4j.models.embeddings.wordvectors.WordVectors wordVectors
public void initWordVectors()
@OptionMetadata(displayName="truncation length", description="The maximum number of tokens per document (default = 100).", commandLineParamName="truncationLength", commandLineParamSynopsis="-truncationLength <int>", displayOrder=2) public int getTruncateLength()
public void setTruncateLength(int truncateLength)
@OptionMetadata(displayName="location of word vectors", description="The word vectors location.", commandLineParamName="wordVectorLocation", commandLineParamSynopsis="-wordVectorLocation <string>", displayOrder=3) public java.io.File getWordVectorLocation()
public void setWordVectorLocation(java.io.File file)
file
- Word vector location@OptionMetadata(displayName="token pre processor", description="The token pre processor.", commandLineParamName="tokenPreProcessor", commandLineParamSynopsis="-tokenPreProcessor <string>", displayOrder=4) public TokenPreProcess getTokenPreProcess()
public void setTokenPreProcess(TokenPreProcess tokenPreProcess)
@OptionMetadata(displayName="tokenizer factory", description="The tokenizer factory.", commandLineParamName="tokenizerFactory", commandLineParamSynopsis="-tokenizerFactory <string>", displayOrder=5) public TokenizerFactory getTokenizerFactory()
public void setTokenizerFactory(TokenizerFactory tokenizerFactory)
@OptionMetadata(displayName="stop words", description="The stop words to use.", commandLineParamName="stopWords", commandLineParamSynopsis="-stopWords <string>", displayOrder=5) public Dl4jAbstractStopwords getStopwords()
public void setStopwords(Dl4jAbstractStopwords stopwords)
@ProgrammaticProperty public org.deeplearning4j.models.embeddings.wordvectors.WordVectors getWordVectors()
@ProgrammaticProperty public void setWordVectors(org.deeplearning4j.models.embeddings.wordvectors.WordVectors wordVectors)
public void initialize()
AbstractInstanceIterator
initialize
in class AbstractInstanceIterator
public org.deeplearning4j.iterator.LabeledSentenceProvider getSentenceProvider(weka.core.Instances data)
data
- Data