ARTICLE AD BOX
I have labeled text data X (here 2-D but potentially n-D dimension) and floating-point data Y (1-D dimension). I was thinking of using a SVR-type model to estimate the vector X that minimizes Y from an XY set. Here's the code I tried, but it doesn't converge to the solution as I expected. How can I do this?
import itertools import random import numpy as np from sklearn.preprocessing import OneHotEncoder from sklearn.svm import SVR from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from scipy.optimize import basinhopping, minimize random.seed(6) # --- Space and functions --- numbers = {str(i): i for i in range(100)} def mul(a: str, b: str) -> int: return numbers[a] * numbers[b] def heuristic(target: int, value: int) -> int: return abs(target - value) space = list(itertools.product(numbers.keys(), numbers.keys())) encoder = OneHotEncoder(sparse_output=False) encoder.fit(space) X_space_enc = encoder.transform(space) # Encoding of the entire space # --- Parameters --- x_target = ("8", "7") target = mul(*x_target) n_init = 10 n_iter = 10 top_k = 3 # --- Initial sampling --- observed_x = list(set(random.sample(space, n_init)) - {x_target}) observed_y = [heuristic(target, mul(*x)) for x in observed_x] def find_candidates_by_inversion(model, obs_x, n_candidates=top_k): """ Find the X values in the discrete space whose predicted Y is closest to 0, by directly evaluating the SVR over the entire unobserved space. This is exact (no approximation) because the space is finite and small. """ unobserved = [x for x in space if x not in obs_x] if not unobserved: return [] C_enc = encoder.transform(unobserved) y_pred = model.predict(C_enc) # Sort by ascending |y_pred - 0| order = np.argsort(np.abs(y_pred)) return [unobserved[i] for i in order[:n_candidates]] def find_candidates_by_continuous_inversion(model, obs_x, n_restarts=10, n_candidates=top_k): """ Optimize X in the continuous encoding space to minimize |f(X) - 0|, then project onto the nearest discrete point (Euclidean distance). Useful when the space is too large to enumerate. """ n_features = X_space_enc.shape[1] obs_enc = set(map(tuple, encoder.transform(obs_x))) found = [] x0 = X_space_enc[random.randint(0, len(X_space_enc) - 1)].copy() x0 += np.random.normal(0, 0.1, size=x0.shape) for _ in range(n_restarts): # Starting point: a randomly observed point, perturbed x0 = X_space_enc[random.randint(0, len(X_space_enc) - 1)].copy() x0 += np.random.normal(0, 0.1, size=x0.shape) res = minimize( fun=lambda x: model.predict(x.reshape(1, -1))[0] ** 2, x0=x0, method="L-BFGS-B", bounds=[(0, 1)] * n_features, ) # Projection: nearest discrete point in the encoded space distances = np.linalg.norm(X_space_enc - res.x, axis=1) ranked = np.argsort(distances) for idx in ranked: candidate = space[idx] if candidate not in obs_x and candidate not in found: found.append(candidate) break # Return the top_k unique sorted by predicted Y if not found: return [] found_enc = encoder.transform(found) y_pred = model.predict(found_enc) order = np.argsort(np.abs(y_pred)) return [found[i] for i in order[:n_candidates]] def run_optimization(use_continuous_inversion=False): obs_x = list(observed_x) obs_y = list(observed_y) for iteration in range(n_iter): X_enc = encoder.transform(obs_x) Y = np.array(obs_y, dtype=float) model = Pipeline([ ("scaler", StandardScaler()), ("svr", SVR(kernel="rbf", C=10, epsilon=0.1)) ]) model.fit(X_enc, Y) # --- Inversion --- if use_continuous_inversion: best_candidates = find_candidates_by_continuous_inversion(model, obs_x) else: best_candidates = find_candidates_by_inversion(model, obs_x) if not best_candidates: print("The entire space was explored.") break for x in best_candidates: obs_x.append(x) obs_y.append(heuristic(target, mul(*x))) best_idx = np.argmin(obs_y) best_x = obs_x[best_idx] best_y = obs_y[best_idx] print(f"[Iter {iteration+1:2d}] Best: x={best_x}, " f"mul={mul(*best_x)}, heuristic={best_y}, " f"candidates={best_candidates}") if best_y == 0: print(f"\nConvergence : {best_x[0]} × {best_x[1]} = {target}") break return obs_x, obs_y print("=== Discrete inversion (finite space) ===") run_optimization(use_continuous_inversion=False) print("\n=== Continuous inversion + projection (widespace) ===") run_optimization(use_continuous_inversion=True)