public abstract class Dl4jStringToWordEmbeddings
extends weka.filters.SimpleBatchFilter
Constructor and Description |
---|
Dl4jStringToWordEmbeddings() |
Modifier and Type | Method and Description |
---|---|
weka.filters.unsupervised.attribute.Dl4jStringToWordEmbeddings.Action |
getAction() |
weka.core.Capabilities |
getCapabilities() |
int |
getConcat_words() |
int |
getConcatWords() |
java.lang.String |
getEmbedding_prefix() |
int |
getEpochs() |
int |
getIterations() |
int |
getLayerSize() |
int |
getMinWordFrequency() |
java.lang.String[] |
getOptions() |
TokenPreProcess |
getPreProcessor() |
int |
getSeed() |
Dl4jAbstractStopwords |
getStopWordsHandler() |
int |
getTextIndex() |
TokenizerFactory |
getTokenizerFactory() |
int |
getWindowSize() |
int |
getWorkers() |
java.util.Enumeration<weka.core.Option> |
listOptions() |
void |
setAction(weka.filters.unsupervised.attribute.Dl4jStringToWordEmbeddings.Action action) |
void |
setConcat_words(int m_concat_words) |
void |
setConcatWords(int concatWords) |
void |
setEmbedding_prefix(java.lang.String embeddingPrefix) |
void |
setEpochs(int m_epochs) |
void |
setIterations(int iterations) |
void |
setLayerSize(int layerSize) |
void |
setMinWordFrequency(int minWordFrequency) |
void |
setOptions(java.lang.String[] options)
Parses the options for this object.
|
void |
setPreProcessor(TokenPreProcess value)
Sets the preprocessor action.
|
void |
setSeed(int m_seed) |
void |
setStopWordsHandler(Dl4jAbstractStopwords m_stopWordsHandler) |
void |
setTextIndex(int textIndex)
Set the attribute's index with the string to process.
|
void |
setTokenizerFactory(TokenizerFactory m_tokenizerFactory) |
void |
setWindowSize(int windowSize) |
void |
setWorkers(int m_workers) |
allowAccessToFullInputFormat, batchFinished, input, input
batchFilterFile, debugTipText, doNotCheckCapabilitiesTipText, filterFile, getCapabilities, getCopyOfInputFormat, getDebug, getDoNotCheckCapabilities, getOutputFormat, getRevision, isFirstBatchDone, isNewBatch, isOutputFormatDefined, main, makeCopies, makeCopy, mayRemoveInstanceAfterFirstBatchDone, numPendingOutput, output, outputPeek, postExecution, preExecution, run, runFilter, setDebug, setDoNotCheckCapabilities, toString, useFilter, wekaStaticWrapper
public java.util.Enumeration<weka.core.Option> listOptions()
listOptions
in interface weka.core.OptionHandler
listOptions
in class weka.filters.Filter
public java.lang.String[] getOptions()
getOptions
in interface weka.core.OptionHandler
getOptions
in class weka.filters.Filter
public void setOptions(java.lang.String[] options) throws java.lang.Exception
setOptions
in interface weka.core.OptionHandler
setOptions
in class weka.filters.Filter
options
- the options to usejava.lang.Exception
- if setting of options failspublic weka.core.Capabilities getCapabilities()
getCapabilities
in interface weka.core.CapabilitiesHandler
getCapabilities
in class weka.filters.Filter
public int getConcatWords()
public void setConcatWords(int concatWords)
@OptionMetadata(displayName="action", description="The action to perform on the embeddings: 1) report embeddings (WORD_VECTOR), 2) Average embeddings of the input string (DOC_VECTOR_AVERAGE),3) Add embeddings of the input string (DOC_VECTOR_ADD), 4) Concatenate the first *concat_words* embeddings of the input string (DOC_VECTOR_CONCAT), (default WORD_VECTOR).", commandLineParamName="action", commandLineParamSynopsis="-level <speficiation>", displayOrder=1) public weka.filters.unsupervised.attribute.Dl4jStringToWordEmbeddings.Action getAction()
public void setAction(weka.filters.unsupervised.attribute.Dl4jStringToWordEmbeddings.Action action)
@OptionMetadata(displayName="concat_words", description="Number of stopWords (from left to right) of the tweet whose embeddings will be concatenated.This parameter only applies if action=DOC_VECTOR_CONCAT (default = 15).", commandLineParamName="concat_words", commandLineParamSynopsis="-concat_words <int>", displayOrder=2) public int getConcat_words()
public void setConcat_words(int m_concat_words)
@OptionMetadata(displayName="stopWordsHandler", description="The stopWordsHandler. Dl4j Null means no stop stopWords are used.", commandLineParamName="stopWordsHandler", commandLineParamSynopsis="-stopWordsHandler <String>", displayOrder=3) public Dl4jAbstractStopwords getStopWordsHandler()
public void setStopWordsHandler(Dl4jAbstractStopwords m_stopWordsHandler)
@OptionMetadata(displayName="tokenizerFactory", description="The tokenizer factory to use on the strings. Default: DefaultTokenizer.", commandLineParamName="tokenizerFactory", commandLineParamSynopsis="-tokenizerFactory <String>", displayOrder=4) public TokenizerFactory getTokenizerFactory()
public void setTokenizerFactory(TokenizerFactory m_tokenizerFactory)
@OptionMetadata(displayName="preprocessor", description="The token Preprocessor for preprocessing the Strings. Default: CommonPreProcessor.", commandLineParamName="preprocessor", commandLineParamSynopsis="-preprocessor <String>", displayOrder=5) public TokenPreProcess getPreProcessor()
public void setPreProcessor(TokenPreProcess value)
value
- the action type@OptionMetadata(displayName="attribute string index", description="The attribute string index (starting from 1) to process (default = 1).", commandLineParamName="index", commandLineParamSynopsis="-index <int>", displayOrder=6) public int getTextIndex()
public void setTextIndex(int textIndex)
textIndex
- the index value name@OptionMetadata(displayName="minWordFrequency", description="The minimum word frequency (default = 5).", commandLineParamName="minWordFrequency", commandLineParamSynopsis="-minWordFrequency <int>", displayOrder=7) public int getMinWordFrequency()
public void setMinWordFrequency(int minWordFrequency)
@OptionMetadata(displayName="layerSize", description="The size of the word vectors (default = 100).", commandLineParamName="layerSize", commandLineParamSynopsis="-layerSize <int>", displayOrder=8) public int getLayerSize()
public void setLayerSize(int layerSize)
@OptionMetadata(displayName="iterations", description="The number of iterations (default = 1).", commandLineParamName="iterations", commandLineParamSynopsis="-iterations <int>", displayOrder=9) public int getIterations()
public void setIterations(int iterations)
@OptionMetadata(displayName="windowSize", description="The size of the window (default = 5).", commandLineParamName="windowSize", commandLineParamSynopsis="-windowSize <int>", displayOrder=10) public int getWindowSize()
public void setWindowSize(int windowSize)
@OptionMetadata(displayName="epochs", description="The number of epochs (iterations over whole training corpus) for training (default = 1).", commandLineParamName="epochs", commandLineParamSynopsis="-epochs <int>", displayOrder=11) public int getEpochs()
public void setEpochs(int m_epochs)
@OptionMetadata(displayName="workers", description="The maximum number of concurrent threads available for training.", commandLineParamName="workers", commandLineParamSynopsis="-workers <int>", displayOrder=12) public int getWorkers()
public void setWorkers(int m_workers)
@OptionMetadata(displayName="seed", description="The random number seed to be used. (default = 1).", commandLineParamName="seed", commandLineParamSynopsis="-seed <int>", displayOrder=13) public int getSeed()
public void setSeed(int m_seed)
@OptionMetadata(displayName="embedding_prefix", description="The prefix for each embedding attribute. Default: \"embedding-\".", commandLineParamName="embedding_prefix", commandLineParamSynopsis="-embedding_prefix <String>", displayOrder=14) public java.lang.String getEmbedding_prefix()
public void setEmbedding_prefix(java.lang.String embeddingPrefix)