OTB  6.7.0
Orfeo Toolbox
otbTrainRandomForests.hxx
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2005-2019 Centre National d'Etudes Spatiales (CNES)
3  *
4  * This file is part of Orfeo Toolbox
5  *
6  * https://www.orfeo-toolbox.org/
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  * http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 #ifndef otbTrainRandomForests_hxx
22 #define otbTrainRandomForests_hxx
25 
26 namespace otb
27 {
28 namespace Wrapper
29 {
30 
31 template <class TInputValue, class TOutputValue>
32 void
33 LearningApplicationBase<TInputValue,TOutputValue>
34 ::InitRandomForestsParams()
35 {
36  AddChoice("classifier.rf", "Random forests classifier");
37  SetParameterDescription("classifier.rf", "http://docs.opencv.org/modules/ml/doc/random_trees.html");
38  //MaxDepth
39  AddParameter(ParameterType_Int, "classifier.rf.max", "Maximum depth of the tree");
40  SetParameterInt("classifier.rf.max",5);
41  SetParameterDescription(
42  "classifier.rf.max",
43  "The depth of the tree. A low value will likely underfit and conversely a high value will likely overfit. "
44  "The optimal value can be obtained using cross validation or other suitable methods.");
45 
46  //MinSampleCount
47  AddParameter(ParameterType_Int, "classifier.rf.min", "Minimum number of samples in each node");
48  SetParameterInt("classifier.rf.min",10);
49  SetParameterDescription(
50  "classifier.rf.min", "If the number of samples in a node is smaller than this parameter, "
51  "then the node will not be split. A reasonable value is a small percentage of the total data e.g. 1 percent.");
52 
53  //RegressionAccuracy
54  AddParameter(ParameterType_Float, "classifier.rf.ra", "Termination Criteria for regression tree");
55  SetParameterFloat("classifier.rf.ra",0.);
56  SetParameterDescription("classifier.rf.ra", "If all absolute differences between an estimated value in a node "
57  "and the values of the train samples in this node are smaller than this regression accuracy parameter, "
58  "then the node will not be split.");
59 
60  //MaxNumberOfCategories
61  AddParameter(ParameterType_Int, "classifier.rf.cat",
62  "Cluster possible values of a categorical variable into K <= cat clusters to find a suboptimal split");
63  SetParameterInt("classifier.rf.cat",10);
64  SetParameterDescription(
65  "classifier.rf.cat",
66  "Cluster possible values of a categorical variable into K <= cat clusters to find a suboptimal split.");
67 
68  //Priors are not exposed.
69 
70  //CalculateVariableImportance not exposed
71 
72  //MaxNumberOfVariables
73  AddParameter(ParameterType_Int, "classifier.rf.var",
74  "Size of the randomly selected subset of features at each tree node");
75  SetParameterInt("classifier.rf.var",0);
76  SetParameterDescription(
77  "classifier.rf.var",
78  "The size of the subset of features, randomly selected at each tree node, that are used to find the best split(s). "
79  "If you set it to 0, then the size will be set to the square root of the total number of features.");
80 
81  //MaxNumberOfTrees
82  AddParameter(ParameterType_Int, "classifier.rf.nbtrees",
83  "Maximum number of trees in the forest");
84  SetParameterInt("classifier.rf.nbtrees",100);
85  SetParameterDescription(
86  "classifier.rf.nbtrees",
87  "The maximum number of trees in the forest. Typically, the more trees you have, the better the accuracy. "
88  "However, the improvement in accuracy generally diminishes and reaches an asymptote for a certain number of trees. "
89  "Also to keep in mind, increasing the number of trees increases the prediction time linearly.");
90 
91  //ForestAccuracy
92  AddParameter(ParameterType_Float, "classifier.rf.acc",
93  "Sufficient accuracy (OOB error)");
94  SetParameterFloat("classifier.rf.acc",0.01);
95  SetParameterDescription("classifier.rf.acc","Sufficient accuracy (OOB error).");
96 
97 
98  //TerminationCriteria not exposed
99 }
100 
101 template <class TInputValue, class TOutputValue>
102 void
103 LearningApplicationBase<TInputValue,TOutputValue>
104 ::TrainRandomForests(typename ListSampleType::Pointer trainingListSample,
105  typename TargetListSampleType::Pointer trainingLabeledListSample,
106  std::string modelPath)
107 {
109  typename RandomForestType::Pointer classifier = RandomForestType::New();
110  classifier->SetRegressionMode(this->m_RegressionFlag);
111  classifier->SetInputListSample(trainingListSample);
112  classifier->SetTargetListSample(trainingLabeledListSample);
113  classifier->SetMaxDepth(GetParameterInt("classifier.rf.max"));
114  classifier->SetMinSampleCount(GetParameterInt("classifier.rf.min"));
115  classifier->SetRegressionAccuracy(GetParameterFloat("classifier.rf.ra"));
116  classifier->SetMaxNumberOfCategories(GetParameterInt("classifier.rf.cat"));
117  classifier->SetMaxNumberOfVariables(GetParameterInt("classifier.rf.var"));
118  classifier->SetMaxNumberOfTrees(GetParameterInt("classifier.rf.nbtrees"));
119  classifier->SetForestAccuracy(GetParameterFloat("classifier.rf.acc"));
120 
121  classifier->Train();
122  classifier->Save(modelPath);
123 }
124 
125 } //end namespace wrapper
126 } //end namespace otb
127 
128 #endif