001/* 002 * Copyright (c) 2015-2020, Oracle and/or its affiliates. All rights reserved. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package org.tribuo.regression.rtree.impl; 018 019import com.oracle.labs.mlrg.olcut.util.Pair; 020import org.tribuo.common.tree.impl.IntArrayContainer; 021 022import java.util.Arrays; 023 024/** 025 * Internal datastructure for implementing a decision tree. 026 * <p> 027 * Represents a single value and feature tuple, with associated arrays for 028 * the indicies where that combination occurs. 029 * <p> 030 * Indices and values must be inserted in sorted ascending order or everything will break. 031 * This code does not check that this invariant is maintained. 032 * <p> 033 * Note: this class has a natural ordering that is inconsistent with equals. 034 */ 035public class InvertedFeature implements Comparable<InvertedFeature> { 036 037 private static final int DEFAULT_SIZE = 8; 038 039 /** 040 * The feature value of this object. 041 */ 042 public final double value; 043 044 /** 045 * Indices must be inserted in a sorted order. 046 */ 047 private int[] indices = null; 048 private int curSize = -1; 049 050 /** 051 * This is a short circuit in case there is a single index in this feature. 052 */ 053 private int index; 054 055 /** 056 * Constructs an inverted feature for the specified value which occurs at the specified indices. 057 * @param value The value. 058 * @param indices The indices where the value occurs. 059 */ 060 public InvertedFeature(double value, int[] indices) { 061 this.value = value; 062 this.indices = indices; 063 this.curSize = indices.length; 064 } 065 066 /** 067 * Constructs an inverted feature for the specifed value which occurs at a single index. 068 * @param value The value. 069 * @param index The index where the value occurs. 070 */ 071 public InvertedFeature(double value, int index) { 072 this.value = value; 073 this.index = index; 074 } 075 076 private InvertedFeature(InvertedFeature other) { 077 this.value = other.value; 078 this.curSize = other.curSize; 079 this.index = other.index; 080 if (other.indices != null) { 081 this.indices = Arrays.copyOf(other.indices,other.indices.length); 082 } else { 083 this.indices = null; 084 } 085 } 086 087 /** 088 * Adds an index where the feature value occurs. 089 * @param index The index. 090 */ 091 public void add(int index) { 092 if (indices == null) { 093 initArrays(); 094 } 095 append(index); 096 } 097 098 private void append(int index) { 099 if (curSize == indices.length) { 100 int newSize = indices.length + (indices.length >> 1); 101 indices = Arrays.copyOf(indices,newSize); 102 } 103 indices[curSize] = index; 104 curSize++; 105 } 106 107 /** 108 * Gets the indices where this feature value occurs. 109 * @return The indices. 110 */ 111 public int[] indices() { 112 if (indices != null) { 113 return indices; 114 } else { 115 int[] ret = new int[1]; 116 ret[0] = index; 117 return ret; 118 } 119 } 120 121 /** 122 * Fixes the size of the backing array. 123 * <p> 124 * Used when all the feature values have been observed. 125 */ 126 public void fixSize() { 127 if (indices != null) { 128 indices = Arrays.copyOf(indices, curSize); 129 } 130 } 131 132 /** 133 * Relies upon allLeftIndices being sorted in ascending order. Undefined when it's not. 134 * @param allLeftIndices The indices of the left branch. 135 * @param buffer The buffer to write out the unused indices to. 136 * @return A pair, with the first element the left branch and the second element the right branch. 137 */ 138 public Pair<InvertedFeature,InvertedFeature> split(IntArrayContainer allLeftIndices, IntArrayContainer buffer) { 139 int[] allLeftArray = allLeftIndices.array; 140 int allLeftSize = allLeftIndices.size; 141 int[] bufferArray = buffer.array; 142 if (indices != null) { 143 // These are init'd to indices.length as allLeftIndices may contain indices not in this InvertedFeature. 144 int[] leftIndices = new int[indices.length]; 145 int leftSize = 0; 146 int[] rightIndices = new int[indices.length]; 147 int rightSize = 0; 148 149 int bufferIdx = 0; 150 int curIndex = 0; 151 int j = 0; 152 for (int i = 0; i < curSize; i++) { 153 //relying on the shortcut evaluation so we don't pop out of allLeftArray 154 while ((j < allLeftSize) && ((curIndex = allLeftArray[j]) < indices[i])) { 155 bufferArray[bufferIdx] = curIndex; 156 bufferIdx++; 157 j++; 158 } 159 if ((j < allLeftSize) && (allLeftArray[j] == indices[i])) { 160 //in the left indices, put in left array 161 leftIndices[leftSize] = indices[i]; 162 leftSize++; 163 j++; // consume the value in allLeftIndices[j] 164 } else { 165 //allLeftIndices[j] now greater than indices[i], so must not include it 166 //put in right array. 167 rightIndices[rightSize] = indices[i]; 168 rightSize++; 169 } 170 } 171 172 if (j < allLeftSize) { 173 System.arraycopy(allLeftArray, j, bufferArray, bufferIdx, allLeftSize - j); 174 } 175 buffer.size = bufferIdx + (allLeftSize - j); 176 allLeftIndices.size = 0; 177 178 InvertedFeature left, right; 179 if (leftSize == 0) { 180 left = null; 181 } else if (leftSize == 1) { 182 left = new InvertedFeature(value,leftIndices[0]); 183 } else { 184 left = new InvertedFeature(value, Arrays.copyOf(leftIndices, leftSize)); 185 } 186 if (rightSize == 0) { 187 right = null; 188 } else if (rightSize == 1) { 189 right = new InvertedFeature(value,rightIndices[0]); 190 } else { 191 right = new InvertedFeature(value, Arrays.copyOf(rightIndices, rightSize)); 192 } 193 return new Pair<>(left,right); 194 } else { 195 //In this case this inverted feature only holds one value, so check for it in left indices 196 boolean found = false; 197 int i = 0; 198 while (!found && i < allLeftSize) { 199 if (allLeftArray[i] == index) { 200 found = true; 201 } else { 202 i++; 203 } 204 } 205 if (found) { 206 System.arraycopy(allLeftArray,0,bufferArray,0,i); 207 i++; 208 while (i < allLeftSize) { 209 bufferArray[i-1] = allLeftArray[i]; 210 i++; 211 } 212 if (i < allLeftSize-1) { 213 System.arraycopy(allLeftArray, i + 1, bufferArray, i, allLeftSize - i); 214 } 215 buffer.size = allLeftSize-1; 216 allLeftIndices.size = 0; 217 return new Pair<>(new InvertedFeature(value,index),null); 218 } else { 219 allLeftIndices.array = bufferArray; 220 allLeftIndices.size = 0; 221 buffer.array = allLeftArray; 222 buffer.size = allLeftSize; 223 return new Pair<>(null,new InvertedFeature(value,index)); 224 } 225 } 226 } 227 228 private void initArrays() { 229 indices = new int[DEFAULT_SIZE]; 230 indices[0] = index; 231 curSize = 1; 232 } 233 234 @Override 235 public int compareTo(InvertedFeature o) { 236 return Double.compare(value, o.value); 237 } 238 239 @Override 240 public String toString() { 241 if (indices != null) { 242 return "InvertedFeature(value=" + value + ",size=" + curSize + ",indices=" + Arrays.toString(indices) + ")"; 243 } else { 244 return "InvertedFeature(value=" + value + ",size=" + curSize + ",index=" + index + ")"; 245 } 246 } 247 248 /** 249 * Copies this inverted feature. 250 * @return A copy of this feature. 251 */ 252 public InvertedFeature deepCopy() { 253 return new InvertedFeature(this); 254 } 255}