OTB  9.0.0
Orfeo Toolbox
otbTrainRandomForests.hxx
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2005-2022 Centre National d'Etudes Spatiales (CNES)
3  *
4  * This file is part of Orfeo Toolbox
5  *
6  * https://www.orfeo-toolbox.org/
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  * http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 #ifndef otbTrainRandomForests_hxx
22 #define otbTrainRandomForests_hxx
25 
26 namespace otb
27 {
28 namespace Wrapper
29 {
30 
31 template <class TInputValue, class TOutputValue>
32 void LearningApplicationBase<TInputValue, TOutputValue>::InitRandomForestsParams()
33 {
34  AddChoice("classifier.rf", "Random forests classifier");
35  SetParameterDescription("classifier.rf", "http://docs.opencv.org/modules/ml/doc/random_trees.html");
36  // MaxDepth
37  AddParameter(ParameterType_Int, "classifier.rf.max", "Maximum depth of the tree");
38  SetParameterInt("classifier.rf.max", 5);
39  SetParameterDescription("classifier.rf.max",
40  "The depth of the tree. A low value will likely underfit and conversely a high value will likely overfit. "
41  "The optimal value can be obtained using cross validation or other suitable methods.");
42 
43  // MinSampleCount
44  AddParameter(ParameterType_Int, "classifier.rf.min", "Minimum number of samples in each node");
45  SetParameterInt("classifier.rf.min", 10);
46  SetParameterDescription("classifier.rf.min",
47  "If the number of samples in a node is smaller than this parameter, "
48  "then the node will not be split. A reasonable value is a small percentage of the total data e.g. 1 percent.");
49 
50  // RegressionAccuracy
51  AddParameter(ParameterType_Float, "classifier.rf.ra", "Termination Criteria for regression tree");
52  SetParameterFloat("classifier.rf.ra", 0.);
53  SetParameterDescription("classifier.rf.ra",
54  "If all absolute differences between an estimated value in a node "
55  "and the values of the train samples in this node are smaller than this regression accuracy parameter, "
56  "then the node will not be split.");
57 
58  // MaxNumberOfCategories
59  AddParameter(ParameterType_Int, "classifier.rf.cat", "Cluster possible values of a categorical variable into K <= cat clusters to find a suboptimal split");
60  SetParameterInt("classifier.rf.cat", 10);
61  SetParameterDescription("classifier.rf.cat", "Cluster possible values of a categorical variable into K <= cat clusters to find a suboptimal split.");
62 
63  // Priors are not exposed.
64 
65  // CalculateVariableImportance not exposed
66 
67  // MaxNumberOfVariables
68  AddParameter(ParameterType_Int, "classifier.rf.var", "Size of the randomly selected subset of features at each tree node");
69  SetParameterInt("classifier.rf.var", 0);
70  SetParameterDescription("classifier.rf.var",
71  "The size of the subset of features, randomly selected at each tree node, that are used to find the best split(s). "
72  "If you set it to 0, then the size will be set to the square root of the total number of features.");
73 
74  // MaxNumberOfTrees
75  AddParameter(ParameterType_Int, "classifier.rf.nbtrees", "Maximum number of trees in the forest");
76  SetParameterInt("classifier.rf.nbtrees", 100);
77  SetParameterDescription("classifier.rf.nbtrees",
78  "The maximum number of trees in the forest. Typically, the more trees you have, the better the accuracy. "
79  "However, the improvement in accuracy generally diminishes and reaches an asymptote for a certain number of trees. "
80  "Also to keep in mind, increasing the number of trees increases the prediction time linearly.");
81 
82  // ForestAccuracy
83  AddParameter(ParameterType_Float, "classifier.rf.acc", "Sufficient accuracy (OOB error)");
84  SetParameterFloat("classifier.rf.acc", 0.01);
85  SetParameterDescription("classifier.rf.acc", "Sufficient accuracy (OOB error).");
86 
87 
88  // TerminationCriteria not exposed
89 }
90 
91 template <class TInputValue, class TOutputValue>
92 void LearningApplicationBase<TInputValue, TOutputValue>::TrainRandomForests(typename ListSampleType::Pointer trainingListSample,
93  typename TargetListSampleType::Pointer trainingLabeledListSample,
94  std::string modelPath)
95 {
97  typename RandomForestType::Pointer classifier = RandomForestType::New();
98  classifier->SetRegressionMode(this->m_RegressionFlag);
99  classifier->SetInputListSample(trainingListSample);
100  classifier->SetTargetListSample(trainingLabeledListSample);
101  classifier->SetMaxDepth(GetParameterInt("classifier.rf.max"));
102  classifier->SetMinSampleCount(GetParameterInt("classifier.rf.min"));
103  classifier->SetRegressionAccuracy(GetParameterFloat("classifier.rf.ra"));
104  classifier->SetMaxNumberOfCategories(GetParameterInt("classifier.rf.cat"));
105  classifier->SetMaxNumberOfVariables(GetParameterInt("classifier.rf.var"));
106  classifier->SetMaxNumberOfTrees(GetParameterInt("classifier.rf.nbtrees"));
107  classifier->SetForestAccuracy(GetParameterFloat("classifier.rf.acc"));
108 
109  classifier->Train();
110  classifier->Save(modelPath);
111 }
112 
113 } // end namespace wrapper
114 } // end namespace otb
115 
116 #endif
otbRandomForestsMachineLearningModel.h
otb
The "otb" namespace contains all Orfeo Toolbox (OTB) classes.
Definition: otbJoinContainer.h:32
otbLearningApplicationBase.h
otb::Wrapper::ParameterType_Int
@ ParameterType_Int
Definition: otbWrapperTypes.h:38
otb::Wrapper::ParameterType_Float
@ ParameterType_Float
Definition: otbWrapperTypes.h:39
otb::RandomForestsMachineLearningModel
Definition: otbRandomForestsMachineLearningModel.h:36