public abstract class Dl4jStringToWordEmbeddings
extends weka.filters.SimpleBatchFilter
| Constructor and Description |
|---|
Dl4jStringToWordEmbeddings() |
| Modifier and Type | Method and Description |
|---|---|
weka.filters.unsupervised.attribute.Dl4jStringToWordEmbeddings.Action |
getAction() |
weka.core.Capabilities |
getCapabilities() |
int |
getConcat_words() |
int |
getConcatWords() |
java.lang.String |
getEmbedding_prefix() |
int |
getEpochs() |
int |
getIterations() |
int |
getLayerSize() |
int |
getMinWordFrequency() |
java.lang.String[] |
getOptions() |
TokenPreProcess |
getPreProcessor() |
int |
getSeed() |
Dl4jAbstractStopwords |
getStopWordsHandler() |
int |
getTextIndex() |
TokenizerFactory |
getTokenizerFactory() |
int |
getWindowSize() |
int |
getWorkers() |
java.util.Enumeration<weka.core.Option> |
listOptions() |
void |
setAction(weka.filters.unsupervised.attribute.Dl4jStringToWordEmbeddings.Action action) |
void |
setConcat_words(int m_concat_words) |
void |
setConcatWords(int concatWords) |
void |
setEmbedding_prefix(java.lang.String embeddingPrefix) |
void |
setEpochs(int m_epochs) |
void |
setIterations(int iterations) |
void |
setLayerSize(int layerSize) |
void |
setMinWordFrequency(int minWordFrequency) |
void |
setOptions(java.lang.String[] options)
Parses the options for this object.
|
void |
setPreProcessor(TokenPreProcess value)
Sets the preprocessor action.
|
void |
setSeed(int m_seed) |
void |
setStopWordsHandler(Dl4jAbstractStopwords m_stopWordsHandler) |
void |
setTextIndex(int textIndex)
Set the attribute's index with the string to process.
|
void |
setTokenizerFactory(TokenizerFactory m_tokenizerFactory) |
void |
setWindowSize(int windowSize) |
void |
setWorkers(int m_workers) |
allowAccessToFullInputFormat, batchFinished, input, inputbatchFilterFile, debugTipText, doNotCheckCapabilitiesTipText, filterFile, getCapabilities, getCopyOfInputFormat, getDebug, getDoNotCheckCapabilities, getOutputFormat, getRevision, isFirstBatchDone, isNewBatch, isOutputFormatDefined, main, makeCopies, makeCopy, mayRemoveInstanceAfterFirstBatchDone, numPendingOutput, output, outputPeek, postExecution, preExecution, run, runFilter, setDebug, setDoNotCheckCapabilities, toString, useFilter, wekaStaticWrapperpublic java.util.Enumeration<weka.core.Option> listOptions()
listOptions in interface weka.core.OptionHandlerlistOptions in class weka.filters.Filterpublic java.lang.String[] getOptions()
getOptions in interface weka.core.OptionHandlergetOptions in class weka.filters.Filterpublic void setOptions(java.lang.String[] options)
throws java.lang.Exception
setOptions in interface weka.core.OptionHandlersetOptions in class weka.filters.Filteroptions - the options to usejava.lang.Exception - if setting of options failspublic weka.core.Capabilities getCapabilities()
getCapabilities in interface weka.core.CapabilitiesHandlergetCapabilities in class weka.filters.Filterpublic int getConcatWords()
public void setConcatWords(int concatWords)
@OptionMetadata(displayName="action",
description="The action to perform on the embeddings: 1) report embeddings (WORD_VECTOR), 2) Average embeddings of the input string (DOC_VECTOR_AVERAGE),3) Add embeddings of the input string (DOC_VECTOR_ADD), 4) Concatenate the first *concat_words* embeddings of the input string (DOC_VECTOR_CONCAT), (default WORD_VECTOR).",
commandLineParamName="action",
commandLineParamSynopsis="-level <speficiation>",
displayOrder=1)
public weka.filters.unsupervised.attribute.Dl4jStringToWordEmbeddings.Action getAction()
public void setAction(weka.filters.unsupervised.attribute.Dl4jStringToWordEmbeddings.Action action)
@OptionMetadata(displayName="concat_words",
description="Number of stopWords (from left to right) of the tweet whose embeddings will be concatenated.This parameter only applies if action=DOC_VECTOR_CONCAT (default = 15).",
commandLineParamName="concat_words",
commandLineParamSynopsis="-concat_words <int>",
displayOrder=2)
public int getConcat_words()
public void setConcat_words(int m_concat_words)
@OptionMetadata(displayName="stopWordsHandler",
description="The stopWordsHandler. Dl4j Null means no stop stopWords are used.",
commandLineParamName="stopWordsHandler",
commandLineParamSynopsis="-stopWordsHandler <String>",
displayOrder=3)
public Dl4jAbstractStopwords getStopWordsHandler()
public void setStopWordsHandler(Dl4jAbstractStopwords m_stopWordsHandler)
@OptionMetadata(displayName="tokenizerFactory",
description="The tokenizer factory to use on the strings. Default: DefaultTokenizer.",
commandLineParamName="tokenizerFactory",
commandLineParamSynopsis="-tokenizerFactory <String>",
displayOrder=4)
public TokenizerFactory getTokenizerFactory()
public void setTokenizerFactory(TokenizerFactory m_tokenizerFactory)
@OptionMetadata(displayName="preprocessor",
description="The token Preprocessor for preprocessing the Strings. Default: CommonPreProcessor.",
commandLineParamName="preprocessor",
commandLineParamSynopsis="-preprocessor <String>",
displayOrder=5)
public TokenPreProcess getPreProcessor()
public void setPreProcessor(TokenPreProcess value)
value - the action type@OptionMetadata(displayName="attribute string index",
description="The attribute string index (starting from 1) to process (default = 1).",
commandLineParamName="index",
commandLineParamSynopsis="-index <int>",
displayOrder=6)
public int getTextIndex()
public void setTextIndex(int textIndex)
textIndex - the index value name@OptionMetadata(displayName="minWordFrequency",
description="The minimum word frequency (default = 5).",
commandLineParamName="minWordFrequency",
commandLineParamSynopsis="-minWordFrequency <int>",
displayOrder=7)
public int getMinWordFrequency()
public void setMinWordFrequency(int minWordFrequency)
@OptionMetadata(displayName="layerSize",
description="The size of the word vectors (default = 100).",
commandLineParamName="layerSize",
commandLineParamSynopsis="-layerSize <int>",
displayOrder=8)
public int getLayerSize()
public void setLayerSize(int layerSize)
@OptionMetadata(displayName="iterations",
description="The number of iterations (default = 1).",
commandLineParamName="iterations",
commandLineParamSynopsis="-iterations <int>",
displayOrder=9)
public int getIterations()
public void setIterations(int iterations)
@OptionMetadata(displayName="windowSize",
description="The size of the window (default = 5).",
commandLineParamName="windowSize",
commandLineParamSynopsis="-windowSize <int>",
displayOrder=10)
public int getWindowSize()
public void setWindowSize(int windowSize)
@OptionMetadata(displayName="epochs",
description="The number of epochs (iterations over whole training corpus) for training (default = 1).",
commandLineParamName="epochs",
commandLineParamSynopsis="-epochs <int>",
displayOrder=11)
public int getEpochs()
public void setEpochs(int m_epochs)
@OptionMetadata(displayName="workers",
description="The maximum number of concurrent threads available for training.",
commandLineParamName="workers",
commandLineParamSynopsis="-workers <int>",
displayOrder=12)
public int getWorkers()
public void setWorkers(int m_workers)
@OptionMetadata(displayName="seed",
description="The random number seed to be used. (default = 1).",
commandLineParamName="seed",
commandLineParamSynopsis="-seed <int>",
displayOrder=13)
public int getSeed()
public void setSeed(int m_seed)
@OptionMetadata(displayName="embedding_prefix",
description="The prefix for each embedding attribute. Default: \"embedding-\".",
commandLineParamName="embedding_prefix",
commandLineParamSynopsis="-embedding_prefix <String>",
displayOrder=14)
public java.lang.String getEmbedding_prefix()
public void setEmbedding_prefix(java.lang.String embeddingPrefix)