001/*
002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package org.tribuo.regression.rtree.impl;
018
019import com.oracle.labs.mlrg.olcut.util.Pair;
020import org.tribuo.common.tree.impl.IntArrayContainer;
021
022import java.util.Arrays;
023
024/**
025 * Internal datastructure for implementing a decision tree.
026 * <p>
027 * Represents a single value and feature tuple, with associated arrays for
028 * the indicies where that combination occurs.
029 * <p>
030 * Indices and values must be inserted in sorted ascending order or everything will break.
031 * This code does not check that this invariant is maintained.
032 * <p>
033 * Note: this class has a natural ordering that is inconsistent with equals.
034 */
035public class InvertedFeature implements Comparable<InvertedFeature> {
036
037    private static final int DEFAULT_SIZE = 8;
038
039    /**
040     * The feature value of this object.
041     */
042    public final double value;
043
044    /**
045     * Indices must be inserted in a sorted order.
046     */
047    private int[] indices = null;
048    private int curSize = -1;
049
050    /**
051     * This is a short circuit in case there is a single index in this feature.
052     */
053    private int index;
054
055    /**
056     * Constructs an inverted feature for the specified value which occurs at the specified indices.
057     * @param value The value.
058     * @param indices The indices where the value occurs.
059     */
060    public InvertedFeature(double value, int[] indices) {
061        this.value = value;
062        this.indices = indices;
063        this.curSize = indices.length;
064    }
065
066    /**
067     * Constructs an inverted feature for the specifed value which occurs at a single index.
068     * @param value The value.
069     * @param index The index where the value occurs.
070     */
071    public InvertedFeature(double value, int index) {
072        this.value = value;
073        this.index = index;
074    }
075
076    private InvertedFeature(InvertedFeature other) {
077        this.value = other.value;
078        this.curSize = other.curSize;
079        this.index = other.index;
080        if (other.indices != null) {
081            this.indices = Arrays.copyOf(other.indices,other.indices.length);
082        } else {
083            this.indices = null;
084        }
085    }
086
087    /**
088     * Adds an index where the feature value occurs.
089     * @param index The index.
090     */
091    public void add(int index) {
092        if (indices == null) {
093            initArrays();
094        }
095        append(index);
096    }
097
098    private void append(int index) {
099        if (curSize == indices.length) {
100            int newSize = indices.length + (indices.length >> 1);
101            indices = Arrays.copyOf(indices,newSize);
102        }
103        indices[curSize] = index;
104        curSize++;
105    }
106
107    /**
108     * Gets the indices where this feature value occurs.
109     * @return The indices.
110     */
111    public int[] indices() {
112        if (indices != null) {
113            return indices;
114        } else {
115            int[] ret = new int[1];
116            ret[0] = index;
117            return ret;
118        }
119    }
120
121    /**
122     * Fixes the size of the backing array.
123     * <p>
124     * Used when all the feature values have been observed.
125     */
126    public void fixSize() {
127        if (indices != null) {
128            indices = Arrays.copyOf(indices, curSize);
129        }
130    }
131
132    /**
133     * Relies upon allLeftIndices being sorted in ascending order. Undefined when it's not.
134     * @param allLeftIndices The indices of the left branch.
135     * @param buffer The buffer to write out the unused indices to.
136     * @return A pair, with the first element the left branch and the second element the right branch.
137     */
138    public Pair<InvertedFeature,InvertedFeature> split(IntArrayContainer allLeftIndices, IntArrayContainer buffer) {
139        int[] allLeftArray = allLeftIndices.array;
140        int allLeftSize = allLeftIndices.size;
141        int[] bufferArray = buffer.array;
142        if (indices != null) {
143            // These are init'd to indices.length as allLeftIndices may contain indices not in this InvertedFeature.
144            int[] leftIndices = new int[indices.length];
145            int leftSize = 0;
146            int[] rightIndices = new int[indices.length];
147            int rightSize = 0;
148
149            int bufferIdx = 0;
150            int curIndex = 0;
151            int j = 0;
152            for (int i = 0; i < curSize; i++) {
153                //relying on the shortcut evaluation so we don't pop out of allLeftArray
154                while ((j < allLeftSize) && ((curIndex = allLeftArray[j]) < indices[i])) {
155                    bufferArray[bufferIdx] = curIndex;
156                    bufferIdx++;
157                    j++;
158                }
159                if ((j < allLeftSize) && (allLeftArray[j] == indices[i])) {
160                    //in the left indices, put in left array
161                    leftIndices[leftSize] = indices[i];
162                    leftSize++;
163                    j++; // consume the value in allLeftIndices[j]
164                } else {
165                    //allLeftIndices[j] now greater than indices[i], so must not include it
166                    //put in right array.
167                    rightIndices[rightSize] = indices[i];
168                    rightSize++;
169                }
170            }
171
172            if (j < allLeftSize) {
173                System.arraycopy(allLeftArray, j, bufferArray, bufferIdx, allLeftSize - j);
174            }
175            buffer.size = bufferIdx + (allLeftSize - j);
176            allLeftIndices.size = 0;
177
178            InvertedFeature left, right;
179            if (leftSize == 0) {
180                left = null;
181            } else if (leftSize == 1) {
182                left = new InvertedFeature(value,leftIndices[0]);
183            } else {
184                left = new InvertedFeature(value, Arrays.copyOf(leftIndices, leftSize));
185            }
186            if (rightSize == 0) {
187                right = null;
188            } else if (rightSize == 1) {
189                right = new InvertedFeature(value,rightIndices[0]);
190            } else {
191                right = new InvertedFeature(value, Arrays.copyOf(rightIndices, rightSize));
192            }
193            return new Pair<>(left,right);
194        } else {
195            //In this case this inverted feature only holds one value, so check for it in left indices
196            boolean found = false;
197            int i = 0;
198            while (!found && i < allLeftSize) {
199                if (allLeftArray[i] == index) {
200                    found = true;
201                } else {
202                    i++;
203                }
204            }
205            if (found) {
206                System.arraycopy(allLeftArray,0,bufferArray,0,i);
207                i++;
208                while (i < allLeftSize) {
209                    bufferArray[i-1] = allLeftArray[i];
210                    i++;
211                }
212                if (i < allLeftSize-1) {
213                    System.arraycopy(allLeftArray, i + 1, bufferArray, i, allLeftSize - i);
214                }
215                buffer.size = allLeftSize-1;
216                allLeftIndices.size = 0;
217                return new Pair<>(new InvertedFeature(value,index),null);
218            } else {
219                allLeftIndices.array = bufferArray;
220                allLeftIndices.size = 0;
221                buffer.array = allLeftArray;
222                buffer.size = allLeftSize;
223                return new Pair<>(null,new InvertedFeature(value,index));
224            }
225        }
226    }
227
228    private void initArrays() {
229        indices = new int[DEFAULT_SIZE];
230        indices[0] = index;
231        curSize = 1;
232    }
233
234    @Override
235    public int compareTo(InvertedFeature o) {
236        return Double.compare(value, o.value);
237    }
238
239    @Override
240    public String toString() {
241        if (indices != null) {
242            return "InvertedFeature(value=" + value + ",size=" + curSize + ",indices=" + Arrays.toString(indices) + ")";
243        } else {
244            return "InvertedFeature(value=" + value + ",size=" + curSize + ",index=" + index + ")";
245        }
246    }
247
248    /**
249     * Copies this inverted feature.
250     * @return A copy of this feature.
251     */
252    public InvertedFeature deepCopy() {
253        return new InvertedFeature(this);
254    }
255}