Rcpp code crashes R (RStudio) - unable to debug due to session termination

2 weeks ago 13
ARTICLE AD BOX

I have (although painstakingly) with the help of some coding programs, re-written one of my R functions in c++ to test if there's any noticeable improvement in speed and memory allocation. After debugging some initial issues, the issues I'm running into now is RStudio encountering a fatal error and crashing when I run the wrapper function (run_in_par_cpp3) below:

Rcpp::sourceCpp("optimize_cpp3.cpp") results <- run_in_par_cpp3( preds_combined = preds_combined, iters = iters, wide_data = wide_data, grid_row = 1, topn = 1, topn_order = T ) > R Session Aborted > R encountered a fatal error. The session was terminated.

It's unclear to me why this is happening given my basically non-existent C++ knowledge and no other message in RStudio besides the fatal error notification. It's also been difficult to discern from similar posts on SO given the uniqueness of each previously asked question(s).

Initially, I thought it was a memory error, however, even with this very small reproducible example, I get a fatal error. I have included all the code necessary to reproduce an example of what I'm working with. What can I try next?

Reprex R code with necessary data to reproduce:

wide_data <- list(Date = structure(19655, class = "Date"), TeamNumber = 1:10, X1 = c(198L, 853L, 853L, 103L, 601L, 763L, 853L, 853L, 223L, 858L), X2 = c(189L, 560L, 257L, 763L, 589L, 539L, 428L, 763L, 145L, 65L), X3 = c(116L, 797L, 411L, 223L, 95L, 238L, 145L, 705L, 773L, 853L), X4 = c(17L, 17L, 66L, 150L, 145L, 95L, 150L, 66L, 116L, 303L), X5 = c(705L, 553L, 433L, 224L, 898L, 695L, 85L, 95L, 224L, 224L), X6 = c(553L, 662L, 87L, 901L, 284L, 676L, 374L, 901L, 379L, 549L), X7 = c(695L, 399L, 424L, 276L, 902L, 276L, 399L, 764L, 902L, 696L), X8 = c(107L, 703L, 107L, 481L, 695L, 764L, 888L, 888L, 197L, 428L), X9 = c(888L, 144L, 144L, 399L, 207L, 177L, 60L, 276L, 276L, 655L), Actual = c(270, 247.4, 251.1, 287.4, 247, 269.7, 333.4, 310.7, 290.7, 299.9 ), amtWon = c(0, 0, 0, 0, 0, 0, 16, 10, 8, 9), buyin = 5, perc_rk = c(0.334641569883861, 0.0974569483380056, 0.12384861834201, 0.585402482979576, 0.094373247897477, 0.33015618742491, 0.974409291149379, 0.858770524629555, 0.631517821385663, 0.749118942731278)) preds_combined <- structure( list( Date = structure(c(19655, 19655, 19655, 19655, 19655, 19655, 19655, 19655, 19655, 19655, 19656, 19656, 19656, 19656, 19656, 19656, 19656, 19656, 19656, 19656), class = c("IDate","Date")), Player = c(113L, 126L, 703L, 313L, 505L, 393L, 66L, 902L, 513L, 648L, 72L, 245L, 492L, 514L, 162L, 748L, 438L, 142L, 700L, 173L), X1 = c(28.44, 18.39, 19.14, 18.77, 28.44, 18.39, 19.59, 38.59, 18.77, 38.59, 18.77, 18.77, 38.59, 19.14, 28.9, 18.39, 48.36, 18.77, 18.39, 38.99), X2 = c(27.94, 17.9, 19.33, 18.62, 27.94, 17.9, 20.19, 38.71, 18.62, 38.32, 18.62, 18.62, 38.32, 19.33, 28.8, 17.9, 48.18, 18.96, 17.9, 39.11), X3 = c(27.48, 17.49, 19.62, 18.55, 27.48, 17.49, 20.91, 38.83, 18.21, 38.12, 18.55, 18.55, 38.12, 19.62, 28.73, 17.49, 48.46, 18.89, 17.49, 39.24), X4 = c(26.82, 17.04, 19.71, 18.64, 26.82, 17.29, 21.37, 39.03, 18, 37.96, 18.64, 18.34, 37.96, 19.71, 28.49, 17.04, 48.8, 18.98, 17.04, 39.44), X5 = c(26.33, 16.72, 19.68, 18.62, 26.33, 16.97, 22.05, 38.91, 17.68, 37.84, 18.62, 18.02, 37.84, 19.68, 28.38, 16.72, 49.08, 18.96, 16.72, 39.67), X6 = c(26.18, 16.4, 19.91, 18.57, 25.79, 16.65, 22.27, 39.07, 17.63, 37.64, 18.57, 17.97, 37.64, 19.91, 28.53, 16.4, 49.3, 19.18, 16.4, 39.83), X7 = c(25.97, 16.24, 19.97, 18.63, 25.58, 16.49, 22.62, 39.35, 17.47, 37.36, 18.4, 17.81, 37.36, 19.97, 28.32, 16.24, 49.38, 19.02, 16.04, 39.82), X8 = c(25.69, 15.95, 19.9, 18.55, 25.3, 16.2, 23.12, 39.5, 17.4, 37.09, 18.33, 17.74, 37.09, 19.9, 28.28, 15.95, 49.49, 18.94, 15.75, 39.98), X9 = c(25.54, 15.83, 19.96, 18.62, 25.15, 15.92, 23.45, 39.41, 17.27, 37, 18.4, 17.61, 37, 19.96, 28.45, 15.83, 49.28, 19.01, 15.63, 40.31), X10 = c(25.29, 15.61, 20.15, 18.6, 24.9, 15.71, 23.64, 39.43, 17.24, 37.02, 18.37, 17.59, 36.75, 20.15, 28.43, 15.61, 49.09, 19.2, 15.41, 40.33)), row.names = c(NA, -20L), class = c("tbl_df","tbl", "data.frame") ) iters <- paste0("X",1:10)

Reprex C++ code saved locally as optimize_cpp3.cpp:

#include <Rcpp.h> #include <vector> #include <string> #include <unordered_map> #include <algorithm> using namespace Rcpp; // store scores and lineup index struct ScoreIndex { double score; int index; }; // for sorting bool compareScoreDesc(const ScoreIndex& a, const ScoreIndex& b) { return a.score > b.score; } bool compareScoreAsc(const ScoreIndex& a, const ScoreIndex& b) { return a.score < b.score; } // [[Rcpp::export]] DataFrame optimize_lineups_cpp(CharacterMatrix lineups, // needs to be a matrix DataFrame preds, // preds CharacterVector iters, // predictions column names from preds df NumericVector actuals, // wide_data$Actual NumericVector won, // wide_data$amtWon NumericVector perc, // wide_data$perc_rk NumericVector buyin, // wide_data$buyin String date_of_interest, int grid_row, // identifier to ID xgb grid row int topn, // how many to keep bool topn_order) { // TRUE = order preds in descending order // set up containers int n_iters = iters.size(); int total_rows = n_iters * topn; int n_lineups = lineups.nrow(); int n_players = preds.nrow(); // preallocate output vectors CharacterVector out_date(total_rows); NumericVector out_actual(total_rows); NumericVector out_won(total_rows); NumericVector out_perc(total_rows); NumericVector out_buyin(total_rows); CharacterVector out_iter(total_rows); IntegerVector out_grid_row(total_rows, grid_row); // fill with constant // extract player IDs once. CharacterVector player_ids = preds["Player"]; int out_idx = 0; // tracks position in output vectors // loop over Iters (these are the X1:Xn columns in preds_combined df) for(int z = 0; z < n_iters; ++z) { std::string iter_name = as<std::string>(iters[z]); // access the specific preds column NumericVector current_preds = preds[iter_name]; // fast kookup map (player name -> proj score) std::unordered_map<std::string, double> projection_map; projection_map.reserve(n_players); for(int p = 0; p < n_players; ++p) { std::string pid = as<std::string>(player_ids[p]); projection_map[pid] = current_preds[p]; } // now score it lneup std::vector<ScoreIndex> scores(n_lineups); for(int i = 0; i < n_lineups; ++i) { double current_sum = 0.0; // loop through the 9 cols of the lineup matrix for(int j = 0; j < 9; ++j) { std::string player_name = as<std::string>(lineups(i, j)); // find player in map. if missing, add 0.0 auto it = projection_map.find(player_name); if (it != projection_map.end()) { current_sum += it->second; } } scores[i] = {current_sum, i}; } // find topn indices if (topn_order) { std::partial_sort(scores.begin(), scores.begin() + topn, scores.end(), compareScoreDesc); } else { std::partial_sort(scores.begin(), scores.begin() + topn, scores.end(), compareScoreAsc); } // fill result vectors using the best indices found previously for(int k = 0; k < topn; ++k) { int best_idx = scores[k].index; out_date[out_idx] = date_of_interest; out_actual[out_idx] = actuals[best_idx]; out_won[out_idx] = won[best_idx]; out_perc[out_idx] = perc[best_idx]; out_buyin[out_idx] = buyin[best_idx]; out_iter[out_idx] = iter_name; out_idx++; } } // return df with only required variables return DataFrame::create( Named("Date") = out_date, Named("Actual") = out_actual, Named("amtWon") = out_won, Named("perc_rk") = out_perc, Named("buyin") = out_buyin, Named("iters") = out_iter, Named("grid_row") = out_grid_row ); }

Wrapper code to run in R:

run_in_par_cpp3 <- function(preds_combined, iters, wide_data, grid_row, topn, topn_order) { ## ensures L_mat is a character matrix L_mat <- as.matrix(wide_data[c("X1","X2","X3","X4","X5","X6","X7","X8","X9")]) mode(L_mat) <- "character" ## to ensure L_mat is not numeric (was causing STRSXP error previously) ## redundant but ensures iters remains character vector (was causing STRSXP error previously) iters_vec <- as.character(iters) ## filter predictions df to dat of interest current_date <- as.character(wide_data$Date[1]) preds_sub <- preds_combined[preds_combined$Date == current_date, ] # 4. Call C++ optimize_lineups_cpp( lineups = L_mat, preds = preds_sub, iters = iters_vec, actuals = as.numeric(wide_data$Actual), won = as.numeric(wide_data$amtWon), perc = as.numeric(wide_data$perc_rk), buyin = as.numeric(wide_data$buyin), date_of_interest = current_date, grid_row = as.integer(grid_row), topn = as.integer(topn), topn_order = as.logical(topn_order) ) }

Session info:

> sessionInfo() R version 4.5.1 (2025-06-13 ucrt) Platform: x86_64-w64-mingw32/x64 Running under: Windows 11 x64 (build 26200) Matrix products: default LAPACK version 3.12.1 locale: [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8 LC_MONETARY=English_United States.utf8 [4] LC_NUMERIC=C LC_TIME=English_United States.utf8 time zone: America/New_York tzcode source: internal attached base packages: [1] stats graphics grDevices utils datasets methods base loaded via a namespace (and not attached): [1] compiler_4.5.1 tools_4.5.1 rstudioapi_0.17.1
Read Entire Article