public abstract class AbstractTextEmbeddingIterator extends AbstractSequenceInstanceIterator
| Modifier and Type | Field and Description |
|---|---|
org.deeplearning4j.models.embeddings.wordvectors.WordVectors |
wordVectors
Loaded word vectors
|
| Constructor and Description |
|---|
AbstractTextEmbeddingIterator() |
| Modifier and Type | Method and Description |
|---|---|
org.deeplearning4j.iterator.LabeledSentenceProvider |
getSentenceProvider(weka.core.Instances data)
Create a sentence provider from the given data.
|
Dl4jAbstractStopwords |
getStopwords() |
TokenizerFactory |
getTokenizerFactory() |
TokenPreProcess |
getTokenPreProcess() |
int |
getTruncateLength() |
java.io.File |
getWordVectorLocation() |
org.deeplearning4j.models.embeddings.wordvectors.WordVectors |
getWordVectors() |
void |
initialize()
Initialize the iterator
|
void |
initWordVectors()
Initialize the word vectors from the given file
|
void |
setStopwords(Dl4jAbstractStopwords stopwords) |
void |
setTokenizerFactory(TokenizerFactory tokenizerFactory) |
void |
setTokenPreProcess(TokenPreProcess tokenPreProcess) |
void |
setTruncateLength(int truncateLength) |
void |
setWordVectorLocation(java.io.File file)
Set the word vector location and try to initialize it
|
void |
setWordVectors(org.deeplearning4j.models.embeddings.wordvectors.WordVectors wordVectors) |
getDataSetIterator, getDataSetIterator, getOptions, getTrainBatchSize, listOptions, setOptions, setTrainBatchSize, validatepublic transient org.deeplearning4j.models.embeddings.wordvectors.WordVectors wordVectors
public void initWordVectors()
@OptionMetadata(displayName="truncation length",
description="The maximum number of tokens per document (default = 100).",
commandLineParamName="truncationLength",
commandLineParamSynopsis="-truncationLength <int>",
displayOrder=2)
public int getTruncateLength()
public void setTruncateLength(int truncateLength)
@OptionMetadata(displayName="location of word vectors",
description="The word vectors location.",
commandLineParamName="wordVectorLocation",
commandLineParamSynopsis="-wordVectorLocation <string>",
displayOrder=3)
public java.io.File getWordVectorLocation()
public void setWordVectorLocation(java.io.File file)
file - Word vector location@OptionMetadata(displayName="token pre processor",
description="The token pre processor.",
commandLineParamName="tokenPreProcessor",
commandLineParamSynopsis="-tokenPreProcessor <string>",
displayOrder=4)
public TokenPreProcess getTokenPreProcess()
public void setTokenPreProcess(TokenPreProcess tokenPreProcess)
@OptionMetadata(displayName="tokenizer factory",
description="The tokenizer factory.",
commandLineParamName="tokenizerFactory",
commandLineParamSynopsis="-tokenizerFactory <string>",
displayOrder=5)
public TokenizerFactory getTokenizerFactory()
public void setTokenizerFactory(TokenizerFactory tokenizerFactory)
@OptionMetadata(displayName="stop words",
description="The stop words to use.",
commandLineParamName="stopWords",
commandLineParamSynopsis="-stopWords <string>",
displayOrder=5)
public Dl4jAbstractStopwords getStopwords()
public void setStopwords(Dl4jAbstractStopwords stopwords)
@ProgrammaticProperty public org.deeplearning4j.models.embeddings.wordvectors.WordVectors getWordVectors()
@ProgrammaticProperty public void setWordVectors(org.deeplearning4j.models.embeddings.wordvectors.WordVectors wordVectors)
public void initialize()
AbstractInstanceIteratorinitialize in class AbstractInstanceIteratorpublic org.deeplearning4j.iterator.LabeledSentenceProvider getSentenceProvider(weka.core.Instances data)
data - Data