[Rd] data is getting corrupted

Jeff D. Hamann jeff.hamann at forestinformatics.com
Mon Nov 29 18:27:42 CET 2004


I've been attempting to perform some analysis on a model that was
interfaced with R (R calls a library that takes SEXPs and converts the
data frames into the internal structures of data), and I notice that for
small data.frames the vectors don't get corrupt (n<200-ish). When I pass
in larger data.frames, the vectors will become corrupt. I've been
PROTECTING the heck out of everything (as best as I can from the examples)
to make sure that something is not overlooked. I know the code in my
library works fine becuase when I attempt to do the same thing (with much
larger data arrays) none of this behaviour occurs.

An example of the corruption is,

    1  1714   ARPA   0.00   0.0000   0.00   0.0000   3.64  0.000     1   
20.00   0.00   0.00     0
    1  1715   ARPA   0.00   0.0000   0.00   0.0000   3.14  0.000     1   
20.00   0.00   0.00     0
    1  1716   ARPA
97538806975312948000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00
  0.0000   0.00   0.0000   4.68  0.000     1    20.00   0.00   0.00   
 0
    1  1717   ARPA   0.00   0.0000   0.00   0.0000   2.50  0.000     1   
20.00   0.00   0.00     0
    1  1718   ARPA   0.00   0.0000   0.00   0.0000   4.78  0.000     1   
20.00   0.00   0.00     0
    1  1719   ARPA   0.00   0.0000   0.00   0.0000   4.04  0.000     1   
20.00   0.00   0.00     0
    1  1720   ARPA   0.00   0.0000   0.00   0.0000   2.60  0.000     1   
20.00   0.00   0.00     0
    1  1721   ARPA
1141566538356936100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00
  0.0000   0.00   0.0000   3.57  0.000     1    20.00   0.00   0.00   
 0
    1  1722   ARPA   0.00   0.0000   0.00   0.0000   2.29  0.000     1   
20.00   0.00   0.00     0


And it's the same columns that become corrupt. I wanted to make sure I'm
using PROTECT correctly (the examples in the docs don't appear very
thourough) and included a snippet below,

SEXP r_write_sample_to_file( SEXP sample_in,
			     SEXP filename )
{

   unsigned long return_code;
   struct SAMPLE_RECORD *sample_ptr;
   SEXP ans;

   PROTECT( filename = AS_CHARACTER( filename ) );
   PROTECT(ans = allocVector(INTSXP, 1));

   PROTECT( sample_in = AS_LIST( sample_in ) );
   sample_ptr = build_sample_from_sexp( sample_in );

   write_sample_to_file(
      &return_code,
      CHAR(STRING_ELT(filename, 0)),
      sample_ptr,
      N_SPECIES,
      SPECIES_PTR );

   if( return_code != CONIFERS_SUCCESS  )
   {
      Rprintf( "unable to write %s\n", CHAR(STRING_ELT(filename, 0)) );
      INTEGER(ans)[0] = -1;
      UNPROTECT(2);
      UNPROTECT( 1 );
      return ans;
   }

   INTEGER(ans)[0] = 0;
   UNPROTECT(2);
   UNPROTECT( 1 );

   return ans;

}

which calls this rather lengthy function (but I thought I should include
the entire function for completeness),


/* this function converts the sample list	*/
/* from R into the internal structure		*/
struct SAMPLE_RECORD *build_sample_from_sexp( SEXP sample )
{
   int i;

   /* plots variables */
   SEXP plot_list;
   SEXP plot_plot_sexp;
   SEXP plot_lat_sexp;
   SEXP plot_long_sexp;
   SEXP plot_elev_sexp;
   SEXP plot_slp_sexp;
   SEXP plot_asp_sexp;
   SEXP plot_h20_sexp;
   SEXP plot_map_sexp;

   /* plants variables */
   SEXP plant_list;
   SEXP plant_plot_sexp;
   SEXP plant_plant_sexp;
   SEXP plant_sp_code_sexp;
   SEXP plant_d6_sexp;
   SEXP plant_d6_area_sexp;
   SEXP plant_dbh_sexp;
   SEXP plant_basal_area_sexp;
   SEXP plant_tht_sexp;
   SEXP plant_cr_sexp;
   SEXP plant_n_stems_sexp;
   SEXP plant_expf_sexp;
   SEXP plant_crown_width_sexp;
   SEXP plant_crown_area_sexp;
   SEXP plant_user_code_sexp;

   char                    temp_sp_code[16];
   struct SAMPLE_RECORD *s_ptr;
   struct SPECIES_RECORD *sp_ptr;

   s_ptr = (struct SAMPLE_RECORD *)calloc( 1, sizeof( struct SAMPLE_RECORD
) );
/*    s_ptr = (struct SAMPLE_RECORD *)Calloc( 1,  struct SAMPLE_RECORD ); */

   /* *fill in the header info */
   strcpy( s_ptr->forest,
CHAR(STRING_ELT(get_list_element(sample,"forest"), 0)) ) ;
   strcpy( s_ptr->subunit,
CHAR(STRING_ELT(get_list_element(sample,"subunit"), 0)) );
   strcpy( s_ptr->stand_name,
CHAR(STRING_ELT(get_list_element(sample,"stand.name"), 0)) );
   strcpy( s_ptr->legal, CHAR(STRING_ELT(get_list_element(sample,"legal"),
0)) );

   s_ptr->elevation = asInteger( get_list_element( sample, "elevation" ) );
   s_ptr->acreage = asReal( get_list_element( sample, "acreage" ) );
   s_ptr->age = asInteger( get_list_element( sample, "age" ) );
   s_ptr->sampled_month = asInteger( get_list_element( sample,
"sampled.month" ) );
   s_ptr->sampled_day = asInteger( get_list_element( sample, "sampled.day"
) );
   s_ptr->sampled_year = asInteger( get_list_element( sample,
"sampled.year" ) );
   s_ptr->current_year = asInteger( get_list_element( sample,
"current.year" ) );
   s_ptr->x0 = asReal( get_list_element( sample, "x0" ) );

   if( s_ptr->age <= 0 )
   {
      s_ptr->age = 0;
   }

   if( s_ptr->sampled_month <= 0 )
   {
      s_ptr->sampled_month = 0;
   }


   /* build the plots vector */
   s_ptr->n_points = asInteger( get_list_element( sample, "n.points" ) );
  s_ptr->plots_ptr = (struct PLOT_RECORD*)calloc(
	 s_ptr->n_points, sizeof( struct PLOT_RECORD ) );

/*   s_ptr->plots_ptr = (struct PLOT_RECORD*)Calloc(  */
/* 	 s_ptr->n_points, struct PLOT_RECORD ); */

   plot_list = get_list_element( sample, "plots" );
   PROTECT( plot_list = AS_LIST( plot_list ) );
//   PROTECT( plot_list  );

   plot_plot_sexp = get_list_element( plot_list, "plot" );
   plot_lat_sexp = get_list_element( plot_list, "latitude" );
   plot_long_sexp = get_list_element( plot_list, "longitude" );
   plot_elev_sexp = get_list_element( plot_list, "elevation" );
   plot_slp_sexp = get_list_element( plot_list, "slope" );
   plot_asp_sexp = get_list_element( plot_list, "aspect" );
   plot_h20_sexp = get_list_element( plot_list, "whc" );
   plot_map_sexp = get_list_element( plot_list, "map" );

   PROTECT( plot_plot_sexp = coerceVector( plot_plot_sexp, INTSXP ) );
   PROTECT( plot_lat_sexp = coerceVector( plot_lat_sexp, REALSXP ) );
   PROTECT( plot_long_sexp = coerceVector( plot_long_sexp, REALSXP ) );
   PROTECT( plot_elev_sexp = coerceVector( plot_elev_sexp, REALSXP ) );
   PROTECT( plot_slp_sexp = coerceVector( plot_slp_sexp, REALSXP ) );
   PROTECT( plot_asp_sexp = coerceVector( plot_asp_sexp, REALSXP ) );
   PROTECT( plot_h20_sexp = coerceVector( plot_h20_sexp, REALSXP ) );
   PROTECT( plot_map_sexp = coerceVector( plot_map_sexp, REALSXP ) );

   /* assign the plot array */
   for( i = 0; i < s_ptr->n_points; i++ )
   {
      s_ptr->plots_ptr[i].plot = INTEGER( plot_plot_sexp )[i];
      s_ptr->plots_ptr[i].latitude = REAL( plot_lat_sexp )[i];
      s_ptr->plots_ptr[i].longitude = REAL( plot_long_sexp )[i];
      s_ptr->plots_ptr[i].elevation = REAL( plot_elev_sexp )[i];
      s_ptr->plots_ptr[i].slope = REAL( plot_slp_sexp )[i];
      s_ptr->plots_ptr[i].aspect = REAL( plot_asp_sexp )[i];
      s_ptr->plots_ptr[i].water_capacity = REAL( plot_h20_sexp )[i];
      s_ptr->plots_ptr[i].mean_annual_precip = REAL( plot_map_sexp )[i];
   }

//   UNPROTECT( 8 );

   /* build the plants vector */
   s_ptr->n_plants = asInteger( get_list_element( sample, "n.plants" ) );
  s_ptr->plants_ptr = (struct PLANT_RECORD*)calloc(
     s_ptr->n_plants, sizeof( struct PLANT_RECORD ) );

/*   s_ptr->plants_ptr = (struct PLANT_RECORD*)Calloc(  */
/*      s_ptr->n_plants, struct PLANT_RECORD ); */


   /* build the plots vector */
  plant_list = get_list_element( sample, "plants" );
   PROTECT( plant_list = AS_LIST( plant_list ) );
//  PROTECT( plant_list );

   plant_plot_sexp = get_list_element( plant_list, "plot" );
   plant_plant_sexp = get_list_element( plant_list, "plant" );
   plant_sp_code_sexp = get_list_element( plant_list, "sp.code" );
   plant_d6_sexp = get_list_element( plant_list, "d6" );
   plant_d6_area_sexp = get_list_element( plant_list, "d6.area" );
   plant_dbh_sexp = get_list_element( plant_list, "dbh" );
   plant_basal_area_sexp = get_list_element( plant_list, "basal.area" );
   plant_tht_sexp = get_list_element( plant_list, "tht" );
   plant_cr_sexp = get_list_element( plant_list, "cr" );
   plant_n_stems_sexp = get_list_element( plant_list, "n.stems" );
   plant_expf_sexp = get_list_element( plant_list, "expf" );
   plant_crown_width_sexp = get_list_element( plant_list, "crown.width" );
   plant_crown_area_sexp = get_list_element( plant_list, "crown.area" );
   plant_user_code_sexp = get_list_element( plant_list, "user.code" );


   /* read the plants */
   PROTECT( plant_plot_sexp = coerceVector( plant_plot_sexp, INTSXP ) );
   PROTECT( plant_plant_sexp = coerceVector( plant_plant_sexp, INTSXP ) );
   PROTECT( plant_sp_code_sexp = coerceVector( plant_sp_code_sexp, STRSXP
) );
   PROTECT( plant_d6_sexp = coerceVector( plant_d6_sexp, REALSXP ) );
   PROTECT( plant_d6_area_sexp = coerceVector( plant_d6_area_sexp, REALSXP
) );
   PROTECT( plant_dbh_sexp = coerceVector( plant_dbh_sexp, REALSXP ) );
   PROTECT( plant_basal_area_sexp = coerceVector( plant_basal_area_sexp,
REALSXP ) );
   PROTECT( plant_tht_sexp = coerceVector( plant_tht_sexp, REALSXP ) );
   PROTECT( plant_cr_sexp = coerceVector( plant_cr_sexp, REALSXP ) );
   PROTECT( plant_n_stems_sexp = coerceVector( plant_n_stems_sexp, INTSXP
) );
   PROTECT( plant_expf_sexp = coerceVector( plant_expf_sexp, REALSXP ) );
   PROTECT( plant_crown_width_sexp = coerceVector( plant_crown_width_sexp,
REALSXP ) );
   PROTECT( plant_crown_area_sexp = coerceVector( plant_crown_area_sexp,
REALSXP ) );
   PROTECT( plant_user_code_sexp = coerceVector( plant_user_code_sexp,
INTSXP ) );

    /* sort the species codes based on sp_code */
    qsort(  (void*)SPECIES_PTR,
            (size_t)(N_SPECIES),
            sizeof( struct SPECIES_RECORD ),
	        compare_species_by_sp_code );

   /* assign the plot array */
   for( i = 0; i < s_ptr->n_plants; i++ )
   {
      s_ptr->plants_ptr[i].plot = INTEGER( plant_plot_sexp )[i];
      s_ptr->plants_ptr[i].plant = INTEGER( plant_plant_sexp )[i];
      strcpy( temp_sp_code, CHAR( STRING_ELT( plant_sp_code_sexp, i ) ) );

      /* get the species code and look up the correct index */
      sp_ptr = get_species_entry_from_code(    N_SPECIES,
					       SPECIES_PTR,
					       temp_sp_code );
      if( !sp_ptr )
      {
	 Rprintf( "couldn't find the species code for %s, %s\n",
		  temp_sp_code, CHAR( STRING_ELT( plant_sp_code_sexp, i ) ) );
	 continue;
      }

      /* this is the index of the "unsorted" array */
      s_ptr->plants_ptr[i].sp_idx = sp_ptr->idx;
      s_ptr->plants_ptr[i].d6 = REAL( plant_d6_sexp )[i];
      s_ptr->plants_ptr[i].d6_area = REAL( plant_d6_area_sexp )[i];
      s_ptr->plants_ptr[i].dbh = REAL( plant_dbh_sexp )[i];
      s_ptr->plants_ptr[i].basal_area = REAL( plant_basal_area_sexp )[i];
      s_ptr->plants_ptr[i].tht = REAL( plant_tht_sexp )[i];
      s_ptr->plants_ptr[i].cr = REAL( plant_cr_sexp )[i];
      s_ptr->plants_ptr[i].n_stems = INTEGER( plant_n_stems_sexp )[i];
      s_ptr->plants_ptr[i].expf = REAL( plant_expf_sexp )[i];
      s_ptr->plants_ptr[i].crown_width = REAL( plant_crown_width_sexp )[i];
      s_ptr->plants_ptr[i].crown_area = REAL( plant_crown_area_sexp )[i];
      s_ptr->plants_ptr[i].user_code = INTEGER( plant_user_code_sexp )[i];

      /* Rprintf( "dbh = %lf\n", s_ptr->plants_ptr[i].dbh );  */
      /* perform some basic error checking here */
      /* see if you can use the ISNAN macro here */

      /* try the isnan macro */
/*       if( ISNAN( REAL( plant_d6_sexp )[i] ) || s_ptr->plants_ptr[i].d6
< 0.0 ) */
/*       { */
/* 	 s_ptr->plants_ptr[i].d6 = 0.0; */
/*       } */

      if( ISNA( REAL( plant_d6_sexp )[i] ) ||
	  ISNAN( REAL( plant_d6_sexp )[i] )  ||
	  s_ptr->plants_ptr[i].d6 < 0.0 )
      {
	 s_ptr->plants_ptr[i].d6 = 0.0;
      }

      if( ISNA( REAL( plant_dbh_sexp )[i] ) ||
	  ISNAN( REAL( plant_dbh_sexp )[i] )  ||
	  s_ptr->plants_ptr[i].dbh < 0.0 )
      {
	 s_ptr->plants_ptr[i].dbh = 0.0;
      }

      if( ISNAN( REAL( plant_tht_sexp )[i] )  || s_ptr->plants_ptr[i].expf
< 0.0 )
      {
	 s_ptr->plants_ptr[i].tht = 0.0;
      }

      if( ISNAN( REAL( plant_cr_sexp )[i] )  || s_ptr->plants_ptr[i].cr <
0.0 )
      {
	 s_ptr->plants_ptr[i].cr = 0.0;
      }

      if( ISNAN( REAL( plant_expf_sexp )[i] )  ||
s_ptr->plants_ptr[i].expf < 0.0 )
      {
	 s_ptr->plants_ptr[i].expf = 0.0;
      }

      if( ISNAN( REAL( plant_crown_width_sexp )[i] )  ||
s_ptr->plants_ptr[i].crown_width < 0.0 )
      {
	 s_ptr->plants_ptr[i].crown_width = 0.0;
      }

      if( ISNAN( REAL( plant_crown_area_sexp )[i] )  ||
s_ptr->plants_ptr[i].crown_area < 0.0 )
      {
	 s_ptr->plants_ptr[i].crown_area = 0.0;
      }

/*       s_ptr->plants_ptr[i].crown_area = REAL( plant_crown_area_sexp
)[i]; */

   }

   /* now sort the species back to the "native" order (by index) */
   qsort(  (void*)SPECIES_PTR,
	   (size_t)(N_SPECIES),
	   sizeof( struct SPECIES_RECORD ),
	   compare_species_by_idx );

   UNPROTECT( 8 );   /* plot lists */
   UNPROTECT( 14 );

   UNPROTECT( 1 ); /* plot_list */
   UNPROTECT( 1 );  /* plant list */


   return s_ptr;

}

I appolgise for the long email, but I'd rather appolgize than ask permission.

I'm sure there's something I don't understand about the PROTECT/UNPROTECT
sequence as this seems to work on smaller data.frames


Thanks,
Jeff.



-- 
Jeff D. Hamann
Forest Informatics, Inc.
PO Box 1421
Corvallis, Oregon 97339-1421
phone 541-754-1428
fax 541-752-0288
jeff.hamann at forestinformatics.com
http://www.forestinformatics.com



More information about the R-devel mailing list