diff -Naur lammps-29Oct09/doc/Section_start.html lammps-30Oct09/doc/Section_start.html --- lammps-29Oct09/doc/Section_start.html 2009-10-29 15:37:40.000000000 -0600 +++ lammps-30Oct09/doc/Section_start.html 2009-10-29 16:04:06.000000000 -0600 @@ -311,38 +311,11 @@ files created when LAMMPS is built, for either all builds or for a particular machine.
-(3) On some machines with some compiler options, the Coulomb tabling -option that is enabled by default for "long" pair -styles such as lj/cut/coul/long and -lj/charmm/coul/long does not work. Tables are used by these styles -since it can offer a 2x speed-up. A symptom of this problem is -getting wildly large energies on timestep 0 of the examples/peptide -simulation. -
-Here are several work-arounds. Coulomb tables can be disabled by -setting "table 0" in the pair_modify command. -
-The associated files (e.g. pair_lj_cut_coul_long.cpp) can be compiled -at a lower optimization level like -O2, or with the compiler flag --fno-strict-aliasing. The latter can be done by adding something -like these lines in your Makefile.machine: -
-NOALIAS = -fno-strict-aliasing --
pair_lj_cut_coul_long.o : pair_lj_cut_coul_long.cpp - $(CC) $(CCFLAGS) $(NOALIAS) -c $< --
pair_lj_charmm_coul_long.o : pair_lj_charmm_coul_long.cpp - $(CC) $(CCFLAGS) $(NOALIAS) -c $< --
On a Macintosh, try compiling the pair "long" files without the -fast -compiler option. -
-(4) Building for a Mac. +
(3) Building for a Mac.
OS X is BSD Unix, so it should just work. See the Makefile.mac file.
-(5) Building for MicroSoft Windows. +
(4) Building for MicroSoft Windows.
The LAMMPS download page has an option to download a pre-built Windows exeutable. See below for instructions for running this executable on diff -Naur lammps-29Oct09/doc/Section_start.txt lammps-30Oct09/doc/Section_start.txt --- lammps-29Oct09/doc/Section_start.txt 2009-10-29 15:37:40.000000000 -0600 +++ lammps-30Oct09/doc/Section_start.txt 2009-10-29 16:04:06.000000000 -0600 @@ -306,38 +306,11 @@ files created when LAMMPS is built, for either all builds or for a particular machine. -(3) On some machines with some compiler options, the Coulomb tabling -option that is enabled by default for "long" "pair -styles"_pair_style.html such as {lj/cut/coul/long} and -{lj/charmm/coul/long} does not work. Tables are used by these styles -since it can offer a 2x speed-up. A symptom of this problem is -getting wildly large energies on timestep 0 of the examples/peptide -simulation. - -Here are several work-arounds. Coulomb tables can be disabled by -setting "table 0" in the "pair_modify"_pair_modify.html command. - -The associated files (e.g. pair_lj_cut_coul_long.cpp) can be compiled -at a lower optimization level like -O2, or with the compiler flag -{-fno-strict-aliasing}. The latter can be done by adding something -like these lines in your Makefile.machine: - -NOALIAS = -fno-strict-aliasing :pre - -pair_lj_cut_coul_long.o : pair_lj_cut_coul_long.cpp - $(CC) $(CCFLAGS) $(NOALIAS) -c $< :pre - -pair_lj_charmm_coul_long.o : pair_lj_charmm_coul_long.cpp - $(CC) $(CCFLAGS) $(NOALIAS) -c $< :pre - -On a Macintosh, try compiling the pair "long" files without the -fast -compiler option. - -(4) Building for a Mac. +(3) Building for a Mac. OS X is BSD Unix, so it should just work. See the Makefile.mac file. -(5) Building for MicroSoft Windows. +(4) Building for MicroSoft Windows. The LAMMPS download page has an option to download a pre-built Windows exeutable. See below for instructions for running this executable on diff -Naur lammps-29Oct09/src/KSPACE/pair_coul_long.cpp lammps-30Oct09/src/KSPACE/pair_coul_long.cpp --- lammps-29Oct09/src/KSPACE/pair_coul_long.cpp 2009-08-08 14:06:53.000000000 -0600 +++ lammps-30Oct09/src/KSPACE/pair_coul_long.cpp 2009-10-29 16:03:02.000000000 -0600 @@ -73,8 +73,7 @@ double r,r2inv,forcecoul,factor_coul; double grij,expm2,prefactor,t,erfc; int *ilist,*jlist,*numneigh,**firstneigh; - float rsq; - int *int_rsq = (int *) &rsq; + double rsq; ecoul = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); @@ -118,11 +117,11 @@ dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; rsq = delx*delx + dely*dely + delz*delz; - if (rsq < cut_coulsq) { - r2inv = 1.0/rsq; - if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrtf(rsq); + r2inv = 1.0/rsq; + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; expm2 = exp(-grij*grij); t = 1.0 / (1.0 + EWALD_P*grij); @@ -131,9 +130,11 @@ forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; } else { - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = qtmp*q[j] * table; if (factor_coul < 1.0) { @@ -310,39 +311,37 @@ dptable = (double *) memory->smalloc(ntable*sizeof(double),"pair:dptable"); } - float rsq; - int *int_rsq = (int *) &rsq; - float minrsq; - int *int_minrsq = (int *) &minrsq; + union_int_float_t rsq_lookup; + union_int_float_t minrsq_lookup; int itablemin; - *int_minrsq = 0 << ncoulshiftbits; - *int_minrsq = *int_minrsq | maskhi; + minrsq_lookup.i = 0 << ncoulshiftbits; + minrsq_lookup.i |= maskhi; for (int i = 0; i < ntable; i++) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | masklo; - if (rsq < tabinnersq) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < tabinnersq) { + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= maskhi; } - r = sqrtf(rsq); + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); if (cut_respa == NULL) { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); ctable[i] = qqrd2e/r; etable[i] = qqrd2e/r * derfc; } else { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2 - 1.0); ctable[i] = 0.0; etable[i] = qqrd2e/r * derfc; ptable[i] = qqrd2e/r; vtable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); ftable[i] += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); ctable[i] = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -352,10 +351,10 @@ } } } - minrsq = MIN(minrsq,rsq); + minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f); } - tabinnersq = minrsq; + tabinnersq = minrsq_lookup.f; int ntablem1 = ntable - 1; @@ -391,18 +390,18 @@ // or ntablem1 if itablemin=0 // deltas at itablemax only needed if corresponding rsq < cut*cut // if so, compute deltas between rsq and cut*cut - + double f_tmp,c_tmp,e_tmp,p_tmp,v_tmp; - itablemin = *int_minrsq & ncoulmask; + itablemin = minrsq_lookup.i & ncoulmask; itablemin >>= ncoulshiftbits; int itablemax = itablemin - 1; if (itablemin == 0) itablemax = ntablem1; - *int_rsq = itablemax << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = itablemax << ncoulshiftbits; + rsq_lookup.i |= maskhi; - if (rsq < cut_coulsq) { - rsq = cut_coulsq; - r = sqrtf(rsq); + if (rsq_lookup.f < cut_coulsq) { + rsq_lookup.f = cut_coulsq; + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); @@ -417,8 +416,8 @@ e_tmp = qqrd2e/r * derfc; p_tmp = qqrd2e/r; v_tmp = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); f_tmp += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); c_tmp = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -429,7 +428,7 @@ } } - drtable[itablemax] = 1.0/(rsq - rtable[itablemax]); + drtable[itablemax] = 1.0/(rsq_lookup.f - rtable[itablemax]); dftable[itablemax] = f_tmp - ftable[itablemax]; dctable[itablemax] = c_tmp - ctable[itablemax]; detable[itablemax] = e_tmp - etable[itablemax]; @@ -529,11 +528,11 @@ forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; } else { - float rsq_single = rsq; - int *int_rsq = (int *) &rsq_single; - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq_single - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = atom->q[i]*atom->q[j] * table; if (factor_coul < 1.0) { diff -Naur lammps-29Oct09/src/KSPACE/pair_lj_charmm_coul_long.cpp lammps-30Oct09/src/KSPACE/pair_lj_charmm_coul_long.cpp --- lammps-29Oct09/src/KSPACE/pair_lj_charmm_coul_long.cpp 2009-08-18 12:02:10.000000000 -0600 +++ lammps-30Oct09/src/KSPACE/pair_lj_charmm_coul_long.cpp 2009-10-29 16:03:02.000000000 -0600 @@ -90,8 +90,7 @@ double grij,expm2,prefactor,t,erfc; double philj,switch1,switch2; int *ilist,*jlist,*numneigh,**firstneigh; - float rsq; - int *int_rsq = (int *) &rsq; + double rsq; evdwl = ecoul = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); @@ -145,7 +144,7 @@ if (rsq < cut_coulsq) { if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrtf(rsq); + r = sqrt(rsq); grij = g_ewald * r; expm2 = exp(-grij*grij); t = 1.0 / (1.0 + EWALD_P*grij); @@ -154,9 +153,11 @@ forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; } else { - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = qtmp*q[j] * table; if (factor_coul < 1.0) { @@ -431,8 +432,7 @@ double philj,switch1,switch2; double rsw; int *ilist,*jlist,*numneigh,**firstneigh; - float rsq; - int *int_rsq = (int *) &rsq; + double rsq; evdwl = ecoul = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); @@ -494,7 +494,7 @@ if (rsq < cut_coulsq) { if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrtf(rsq); + r = sqrt(rsq); grij = g_ewald * r; expm2 = exp(-grij*grij); t = 1.0 / (1.0 + EWALD_P*grij); @@ -515,9 +515,11 @@ } } } else { - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = qtmp*q[j] * table; if (factor_coul < 1.0) { @@ -540,7 +542,7 @@ forcelj = forcelj*switch1 + philj*switch2; } if (rsq < cut_in_on_sq) { - rsw = (sqrtf(rsq) - cut_in_off)/cut_in_diff; + rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; forcelj *= rsw*rsw*(3.0 - 2.0*rsw); } } else forcelj = 0.0; @@ -888,39 +890,37 @@ dptable = (double *) memory->smalloc(ntable*sizeof(double),"pair:dptable"); } - float rsq; - int *int_rsq = (int *) &rsq; - float minrsq; - int *int_minrsq = (int *) &minrsq; + union_int_float_t rsq_lookup; + union_int_float_t minrsq_lookup; int itablemin; - *int_minrsq = 0 << ncoulshiftbits; - *int_minrsq = *int_minrsq | maskhi; + minrsq_lookup.i = 0 << ncoulshiftbits; + minrsq_lookup.i |= maskhi; for (int i = 0; i < ntable; i++) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | masklo; - if (rsq < tabinnersq) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < tabinnersq) { + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= maskhi; } - r = sqrtf(rsq); + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); if (cut_respa == NULL) { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); ctable[i] = qqrd2e/r; etable[i] = qqrd2e/r * derfc; } else { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2 - 1.0); ctable[i] = 0.0; etable[i] = qqrd2e/r * derfc; ptable[i] = qqrd2e/r; vtable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); ftable[i] += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); ctable[i] = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -930,10 +930,10 @@ } } } - minrsq = MIN(minrsq,rsq); + minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f); } - tabinnersq = minrsq; + tabinnersq = minrsq_lookup.f; int ntablem1 = ntable - 1; @@ -971,16 +971,16 @@ // if so, compute deltas between rsq and cut*cut double f_tmp,c_tmp,e_tmp,p_tmp,v_tmp; - itablemin = *int_minrsq & ncoulmask; + itablemin = minrsq_lookup.i & ncoulmask; itablemin >>= ncoulshiftbits; int itablemax = itablemin - 1; if (itablemin == 0) itablemax = ntablem1; - *int_rsq = itablemax << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = itablemax << ncoulshiftbits; + rsq_lookup.i |= maskhi; - if (rsq < cut_coulsq) { - rsq = cut_coulsq; - r = sqrtf(rsq); + if (rsq_lookup.f < cut_coulsq) { + rsq_lookup.f = cut_coulsq; + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); @@ -995,8 +995,8 @@ e_tmp = qqrd2e/r * derfc; p_tmp = qqrd2e/r; v_tmp = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); f_tmp += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); c_tmp = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -1007,7 +1007,7 @@ } } - drtable[itablemax] = 1.0/(rsq - rtable[itablemax]); + drtable[itablemax] = 1.0/(rsq_lookup.f - rtable[itablemax]); dftable[itablemax] = f_tmp - ftable[itablemax]; dctable[itablemax] = c_tmp - ctable[itablemax]; detable[itablemax] = e_tmp - etable[itablemax]; @@ -1146,11 +1146,11 @@ forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; } else { - float rsq_single = rsq; - int *int_rsq = (int *) &rsq_single; - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq_single - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = atom->q[i]*atom->q[j] * table; if (factor_coul < 1.0) { diff -Naur lammps-29Oct09/src/KSPACE/pair_lj_cut_coul_long.cpp lammps-30Oct09/src/KSPACE/pair_lj_cut_coul_long.cpp --- lammps-29Oct09/src/KSPACE/pair_lj_cut_coul_long.cpp 2009-08-18 12:02:10.000000000 -0600 +++ lammps-30Oct09/src/KSPACE/pair_lj_cut_coul_long.cpp 2009-10-29 16:03:02.000000000 -0600 @@ -85,8 +85,7 @@ double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; double grij,expm2,prefactor,t,erfc; int *ilist,*jlist,*numneigh,**firstneigh; - float rsq; - int *int_rsq = (int *) &rsq; + double rsq; evdwl = ecoul = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); @@ -141,7 +140,7 @@ if (rsq < cut_coulsq) { if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrtf(rsq); + r = sqrt(rsq); grij = g_ewald * r; expm2 = exp(-grij*grij); t = 1.0 / (1.0 + EWALD_P*grij); @@ -150,9 +149,11 @@ forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; } else { - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = qtmp*q[j] * table; if (factor_coul < 1.0) { @@ -406,8 +407,7 @@ double grij,expm2,prefactor,t,erfc; double rsw; int *ilist,*jlist,*numneigh,**firstneigh; - float rsq; - int *int_rsq = (int *) &rsq; + double rsq; evdwl = ecoul = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); @@ -469,7 +469,7 @@ if (rsq < cut_coulsq) { if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrtf(rsq); + r = sqrt(rsq); grij = g_ewald * r; expm2 = exp(-grij*grij); t = 1.0 / (1.0 + EWALD_P*grij); @@ -479,9 +479,9 @@ if (rsq > cut_in_off_sq) { if (rsq < cut_in_on_sq) { rsw = (r - cut_in_off)/cut_in_diff; - forcecoul += prefactor*rsw*rsw*(3 - 2*rsw); + forcecoul += prefactor*rsw*rsw*(3.0 - 2.0*rsw); if (factor_coul < 1.0) - forcecoul -= (1.0-factor_coul)*prefactor*rsw*rsw*(3 - 2*rsw); + forcecoul -= (1.0-factor_coul)*prefactor*rsw*rsw*(3.0 - 2.0*rsw); } else { forcecoul += prefactor; if (factor_coul < 1.0) @@ -489,9 +489,11 @@ } } } else { - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = qtmp*q[j] * table; if (factor_coul < 1.0) { @@ -506,8 +508,8 @@ r6inv = r2inv*r2inv*r2inv; forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); if (rsq < cut_in_on_sq) { - rsw = (sqrtf(rsq) - cut_in_off)/cut_in_diff; - forcelj *= rsw*rsw*(3 - 2*rsw); + rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; + forcelj *= rsw*rsw*(3.0 - 2.0*rsw); } } else forcelj = 0.0; @@ -847,39 +849,37 @@ dptable = (double *) memory->smalloc(ntable*sizeof(double),"pair:dptable"); } - float rsq; - int *int_rsq = (int *) &rsq; - float minrsq; - int *int_minrsq = (int *) &minrsq; + union_int_float_t rsq_lookup; + union_int_float_t minrsq_lookup; int itablemin; - *int_minrsq = 0 << ncoulshiftbits; - *int_minrsq = *int_minrsq | maskhi; + minrsq_lookup.i = 0 << ncoulshiftbits; + minrsq_lookup.i |= maskhi; for (int i = 0; i < ntable; i++) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | masklo; - if (rsq < tabinnersq) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < tabinnersq) { + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= maskhi; } - r = sqrtf(rsq); + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); if (cut_respa == NULL) { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); ctable[i] = qqrd2e/r; etable[i] = qqrd2e/r * derfc; } else { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2 - 1.0); ctable[i] = 0.0; etable[i] = qqrd2e/r * derfc; ptable[i] = qqrd2e/r; vtable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); ftable[i] += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); ctable[i] = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -889,10 +889,10 @@ } } } - minrsq = MIN(minrsq,rsq); + minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f); } - tabinnersq = minrsq; + tabinnersq = minrsq_lookup.f; int ntablem1 = ntable - 1; @@ -928,18 +928,18 @@ // or ntablem1 if itablemin=0 // deltas at itablemax only needed if corresponding rsq < cut*cut // if so, compute deltas between rsq and cut*cut - + double f_tmp,c_tmp,e_tmp,p_tmp,v_tmp; - itablemin = *int_minrsq & ncoulmask; + itablemin = minrsq_lookup.i & ncoulmask; itablemin >>= ncoulshiftbits; int itablemax = itablemin - 1; if (itablemin == 0) itablemax = ntablem1; - *int_rsq = itablemax << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = itablemax << ncoulshiftbits; + rsq_lookup.i |= maskhi; - if (rsq < cut_coulsq) { - rsq = cut_coulsq; - r = sqrtf(rsq); + if (rsq_lookup.f < cut_coulsq) { + rsq_lookup.f = cut_coulsq; + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); @@ -954,8 +954,8 @@ e_tmp = qqrd2e/r * derfc; p_tmp = qqrd2e/r; v_tmp = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); f_tmp += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); c_tmp = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -966,7 +966,7 @@ } } - drtable[itablemax] = 1.0/(rsq - rtable[itablemax]); + drtable[itablemax] = 1.0/(rsq_lookup.f - rtable[itablemax]); dftable[itablemax] = f_tmp - ftable[itablemax]; dctable[itablemax] = c_tmp - ctable[itablemax]; detable[itablemax] = e_tmp - etable[itablemax]; @@ -1099,11 +1099,11 @@ forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; } else { - float rsq_single = rsq; - int *int_rsq = (int *) &rsq_single; - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup_single; + rsq_lookup_single.f = rsq; + itable = rsq_lookup_single.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq_single - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup_single.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = atom->q[i]*atom->q[j] * table; if (factor_coul < 1.0) { diff -Naur lammps-29Oct09/src/KSPACE/pair_lj_cut_coul_long_tip4p.cpp lammps-30Oct09/src/KSPACE/pair_lj_cut_coul_long_tip4p.cpp --- lammps-29Oct09/src/KSPACE/pair_lj_cut_coul_long_tip4p.cpp 2009-08-08 14:06:53.000000000 -0600 +++ lammps-30Oct09/src/KSPACE/pair_lj_cut_coul_long_tip4p.cpp 2009-10-29 16:03:02.000000000 -0600 @@ -77,8 +77,7 @@ double xiM[3],xjM[3],fO[3],fH[3],v[6]; double *x1,*x2; int *ilist,*jlist,*numneigh,**firstneigh; - float rsq; - int *int_rsq = (int *) &rsq; + double rsq; evdwl = ecoul = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); @@ -174,9 +173,9 @@ // test current rsq against cutoff and compute Coulombic force if (rsq < cut_coulsq) { + r2inv = 1 / rsq; if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrtf(rsq); - r2inv = 1 / rsq; + r = sqrt(rsq); grij = g_ewald * r; expm2 = exp(-grij*grij); t = 1.0 / (1.0 + EWALD_P*grij); @@ -187,10 +186,11 @@ forcecoul -= (1.0-factor_coul)*prefactor; } } else { - r2inv = 1 / rsq; - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = qtmp*q[j] * table; if (factor_coul < 1.0) { diff -Naur lammps-29Oct09/src/OPT/pair_lj_charmm_coul_long_opt.h lammps-30Oct09/src/OPT/pair_lj_charmm_coul_long_opt.h --- lammps-29Oct09/src/OPT/pair_lj_charmm_coul_long_opt.h 2008-01-31 13:45:48.000000000 -0700 +++ lammps-30Oct09/src/OPT/pair_lj_charmm_coul_long_opt.h 2009-10-29 16:03:02.000000000 -0600 @@ -65,9 +65,8 @@ double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; double grij,expm2,prefactor,t,erfc; double philj,switch1,switch2; - - float rsq; - int *int_rsq = (int *) &rsq; + + double rsq; double evdwl = 0.0; double ecoul = 0.0; @@ -142,7 +141,7 @@ forcecoul = 0.0; if (rsq < cut_coulsq) { if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrtf(rsq); + r = sqrt(rsq); grij = g_ewald * r; expm2 = exp(-grij*grij); t = 1.0 / (1.0 + EWALD_P*grij); @@ -152,9 +151,11 @@ prefactor = qqrd2e * tmp_coef3/r; forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); } else { - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = tmp_coef3 * table; } @@ -228,7 +229,7 @@ forcecoul = 0.0; if (rsq < cut_coulsq) { if (!ncoultablebits || rsq <= tabinnersq) { - r = sqrtf(rsq); + r = sqrt(rsq); grij = g_ewald * r; expm2 = exp(-grij*grij); t = 1.0 / (1.0 + EWALD_P*grij); @@ -241,9 +242,11 @@ forcecoul -= (1.0-factor_coul)*prefactor; } } else { - itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - fraction = (rsq - rtable[itable]) * drtable[itable]; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; table = ftable[itable] + fraction*dftable[itable]; forcecoul = tmp_coef3 * table; if (factor_coul < 1.0) { diff -Naur lammps-29Oct09/src/USER-CG-CMM/pair_cg_cmm_coul_long.cpp lammps-30Oct09/src/USER-CG-CMM/pair_cg_cmm_coul_long.cpp --- lammps-29Oct09/src/USER-CG-CMM/pair_cg_cmm_coul_long.cpp 2009-08-11 07:52:10.000000000 -0600 +++ lammps-30Oct09/src/USER-CG-CMM/pair_cg_cmm_coul_long.cpp 2009-10-29 16:03:02.000000000 -0600 @@ -163,38 +163,36 @@ dptable = (double *) memory->smalloc(ntable*sizeof(double),"pair:dptable"); } - float rsq; - int *int_rsq = (int *) &rsq; - float minrsq; - int *int_minrsq = (int *) &minrsq; + union_int_float_t rsq_lookup; + union_int_float_t minrsq_lookup; int itablemin; - *int_minrsq = 0 << ncoulshiftbits; - *int_minrsq = *int_minrsq | maskhi; + minrsq_lookup.i = 0 << ncoulshiftbits; + minrsq_lookup.i |= maskhi; for (int i = 0; i < ntable; i++) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | masklo; - if (rsq < tabinnersq) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < tabinnersq) { + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= maskhi; } - r = sqrtf(rsq); + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); if (cut_respa == NULL) { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); ctable[i] = qqrd2e/r; etable[i] = qqrd2e/r * derfc; } else { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2 - 1.0); ctable[i] = 0.0; etable[i] = qqrd2e/r * derfc; ptable[i] = qqrd2e/r; vtable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); ftable[i] += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); ctable[i] = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -204,9 +202,9 @@ } } } - minrsq = MIN(minrsq,rsq); + minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f); } - tabinnersq = minrsq; + tabinnersq = minrsq_lookup.f; int ntablem1 = ntable - 1; @@ -244,15 +242,15 @@ // if so, compute deltas between rsq and cut*cut double f_tmp,c_tmp,e_tmp,p_tmp,v_tmp; - itablemin = *int_minrsq & ncoulmask; + itablemin = minrsq_lookup.i & ncoulmask; itablemin >>= ncoulshiftbits; int itablemax = itablemin - 1; if (itablemin == 0) itablemax = ntablem1; - *int_rsq = itablemax << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; - if (rsq < cut_coulsq_global) { - rsq = cut_coulsq_global; - r = sqrtf(rsq); + rsq_lookup.i = itablemax << ncoulshiftbits; + rsq_lookup.i |= maskhi; + if (rsq_lookup.f < cut_coulsq_global) { + rsq_lookup.f = cut_coulsq_global; + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); @@ -267,8 +265,8 @@ e_tmp = qqrd2e/r * derfc; p_tmp = qqrd2e/r; v_tmp = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); f_tmp += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); c_tmp = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -279,7 +277,7 @@ } } - drtable[itablemax] = 1.0/(rsq - rtable[itablemax]); + drtable[itablemax] = 1.0/(rsq_lookup.f - rtable[itablemax]); dftable[itablemax] = f_tmp - ftable[itablemax]; dctable[itablemax] = c_tmp - ctable[itablemax]; detable[itablemax] = e_tmp - etable[itablemax]; diff -Naur lammps-29Oct09/src/USER-CG-CMM/pair_cmm_common.h lammps-30Oct09/src/USER-CG-CMM/pair_cmm_common.h --- lammps-29Oct09/src/USER-CG-CMM/pair_cmm_common.h 2008-05-15 17:03:19.000000000 -0600 +++ lammps-30Oct09/src/USER-CG-CMM/pair_cmm_common.h 2009-10-29 16:03:02.000000000 -0600 @@ -257,9 +257,8 @@ if (COUL_TYPE == CG_COUL_LONG) { if (rsq < cut_coulsq_global) { - const float rsqf = rsq; if (!ncoultablebits || rsq <= tabinnersq) { - const float r = sqrtf(rsq); + const double r = sqrt(rsq); const double grij = g_ewald * r; const double expm2 = exp(-grij*grij); @@ -273,10 +272,11 @@ if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor; } } else { - const int *int_rsq = (int *) &rsqf; - int itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + int itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - const double fraction = (rsq - rtable[itable]) * drtable[itable]; + const double fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; const double table = ftable[itable] + fraction*dftable[itable]; forcecoul = qtmp*q[j] * table; if (EFLAG) { @@ -654,9 +654,8 @@ if (COUL_TYPE == CG_COUL_LONG) { if (rsq < cut_coulsq_global) { - const float rsqf = rsq; if (!ncoultablebits || rsq <= tabinnersq) { - const float r = sqrtf(rsq); + const double r = sqrt(rsq); const double grij = g_ewald * r; const double expm2 = exp(-grij*grij); @@ -670,10 +669,11 @@ if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor; } } else { - const int *int_rsq = (int *) &rsqf; - int itable = *int_rsq & ncoulmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + int itable = rsq_lookup.i & ncoulmask; itable >>= ncoulshiftbits; - const double fraction = (rsq - rtable[itable]) * drtable[itable]; + const double fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; const double table = ftable[itable] + fraction*dftable[itable]; forcecoul = qtmp*q[j] * table; if (EFLAG) { diff -Naur lammps-29Oct09/src/USER-EWALDN/pair_buck_coul.cpp lammps-30Oct09/src/USER-EWALDN/pair_buck_coul.cpp --- lammps-29Oct09/src/USER-EWALDN/pair_buck_coul.cpp 2009-08-08 14:06:53.000000000 -0600 +++ lammps-30Oct09/src/USER-EWALDN/pair_buck_coul.cpp 2009-10-29 16:03:02.000000000 -0600 @@ -224,7 +224,6 @@ void PairBuckCoul::init_style() { - int i,j; // require an atom style with charge defined @@ -508,17 +507,18 @@ } } // table real space else { - register float t = rsq; - register int k = (((int *) &t)[0] & ncoulmask)>>ncoulshiftbits; + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask) >> ncoulshiftbits; register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j]; if (ni < 0) { force_coul = qiqj*(ftable[k]+f*dftable[k]); if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]); } else { // special case - t = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); - force_coul = qiqj*(ftable[k]+f*dftable[k]-t); - if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t); + t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t.f); } } } @@ -848,17 +848,18 @@ if (respa_flag) respa_coul = ni<0 ? // correct for respa frespa*qri*q[j]/r : frespa*qri*q[j]/r*special_coul[ni]; - register float t = rsq; - register int k = (((int *) &t)[0] & ncoulmask)>>ncoulshiftbits; + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask) >> ncoulshiftbits; register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j]; if (ni < 0) { force_coul = qiqj*(ftable[k]+f*dftable[k]); if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]); } else { // correct for special - t = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); - force_coul = qiqj*(ftable[k]+f*dftable[k]-t); - if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t); + t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t.f); } } } @@ -964,39 +965,37 @@ dptable = (double *) memory->smalloc(ntable*sizeof(double),"pair:dptable"); } - float rsq; - int *int_rsq = (int *) &rsq; - float minrsq; - int *int_minrsq = (int *) &minrsq; + union_int_float_t rsq_lookup; + union_int_float_t minrsq_lookup; int itablemin; - *int_minrsq = 0 << ncoulshiftbits; - *int_minrsq = *int_minrsq | maskhi; + minrsq_lookup.i = 0 << ncoulshiftbits; + minrsq_lookup.i |= maskhi; for (int i = 0; i < ntable; i++) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | masklo; - if (rsq < tabinnersq) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < tabinnersq) { + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= maskhi; } - r = sqrt(rsq); + r = sqrt(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); if (cut_respa == NULL) { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); ctable[i] = qqrd2e/r; etable[i] = qqrd2e/r * derfc; } else { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2 - 1.0); ctable[i] = 0.0; etable[i] = qqrd2e/r * derfc; ptable[i] = qqrd2e/r; vtable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); ftable[i] += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); ctable[i] = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -1006,10 +1005,10 @@ } } } - minrsq = MIN(minrsq,rsq); + minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f); } - tabinnersq = minrsq; + tabinnersq = minrsq_lookup.f; int ntablem1 = ntable - 1; @@ -1047,16 +1046,16 @@ // if so, compute deltas between rsq and cut*cut double f_tmp,c_tmp,e_tmp,p_tmp,v_tmp; - itablemin = *int_minrsq & ncoulmask; + itablemin = minrsq_lookup.i & ncoulmask; itablemin >>= ncoulshiftbits; int itablemax = itablemin - 1; if (itablemin == 0) itablemax = ntablem1; - *int_rsq = itablemax << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = itablemax << ncoulshiftbits; + rsq_lookup.i |= maskhi; - if (rsq < cut_coulsq) { - rsq = cut_coulsq; - r = sqrt(rsq); + if (rsq_lookup.f < cut_coulsq) { + rsq_lookup.f = cut_coulsq; + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); @@ -1071,8 +1070,8 @@ e_tmp = qqrd2e/r * derfc; p_tmp = qqrd2e/r; v_tmp = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); f_tmp += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); c_tmp = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -1083,7 +1082,7 @@ } } - drtable[itablemax] = 1.0/(rsq - rtable[itablemax]); + drtable[itablemax] = 1.0/(rsq_lookup.f - rtable[itablemax]); dftable[itablemax] = f_tmp - ftable[itablemax]; dctable[itablemax] = c_tmp - ctable[itablemax]; detable[itablemax] = e_tmp - etable[itablemax]; @@ -1136,12 +1135,13 @@ eng += t-f; } else { // table real space - register float t = rsq; - register int k = (((int *) &t)[0] & ncoulmask)>>ncoulshiftbits; + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask) >> ncoulshiftbits; register double f = (rsq-rtable[k])*drtable[k], qiqj = q[i]*q[j]; - t = (1.0-factor_coul)*(ctable[k]+f*dctable[k]); - force_coul = qiqj*(ftable[k]+f*dftable[k]-t); - eng += qiqj*(etable[k]+f*detable[k]-t); + t.f = (1.0-factor_coul)*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + eng += qiqj*(etable[k]+f*detable[k]-t.f); } } else force_coul = 0.0; diff -Naur lammps-29Oct09/src/USER-EWALDN/pair_lj_coul.cpp lammps-30Oct09/src/USER-EWALDN/pair_lj_coul.cpp --- lammps-29Oct09/src/USER-EWALDN/pair_lj_coul.cpp 2009-08-08 14:06:53.000000000 -0600 +++ lammps-30Oct09/src/USER-EWALDN/pair_lj_coul.cpp 2009-10-29 16:03:02.000000000 -0600 @@ -220,7 +220,7 @@ { char *style1[] = {"ewald", "ewald/n", "pppm", NULL}; char *style6[] = {"ewald/n", NULL}; - int i,j; + int i; // require an atom style with charge defined @@ -508,17 +508,18 @@ } } // table real space else { - register float t = rsq; - register int k = (((int *) &t)[0] & ncoulmask)>>ncoulshiftbits; + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask)>>ncoulshiftbits; register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j]; if (ni < 0) { force_coul = qiqj*(ftable[k]+f*dftable[k]); if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]); } else { // special case - t = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); - force_coul = qiqj*(ftable[k]+f*dftable[k]-t); - if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t); + t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t.f); } } } @@ -841,17 +842,18 @@ if (respa_flag) respa_coul = ni<0 ? // correct for respa frespa*qri*q[j]/sqrt(rsq) : frespa*qri*q[j]/sqrt(rsq)*special_coul[ni]; - register float t = rsq; - register int k = (((int *) &t)[0] & ncoulmask)>>ncoulshiftbits; + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask) >> ncoulshiftbits; register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j]; if (ni < 0) { force_coul = qiqj*(ftable[k]+f*dftable[k]); if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]); } else { // correct for special - t = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); - force_coul = qiqj*(ftable[k]+f*dftable[k]-t); - if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t); + t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]-t.f); } } } @@ -955,39 +957,37 @@ dptable = (double *) memory->smalloc(ntable*sizeof(double),"pair:dptable"); } - float rsq; - int *int_rsq = (int *) &rsq; - float minrsq; - int *int_minrsq = (int *) &minrsq; + union_int_float_t rsq_lookup; + union_int_float_t minrsq_lookup; int itablemin; - *int_minrsq = 0 << ncoulshiftbits; - *int_minrsq = *int_minrsq | maskhi; + minrsq_lookup.i = 0 << ncoulshiftbits; + minrsq_lookup.i |= maskhi; for (int i = 0; i < ntable; i++) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | masklo; - if (rsq < tabinnersq) { - *int_rsq = i << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < tabinnersq) { + rsq_lookup.i = i << ncoulshiftbits; + rsq_lookup.i |= maskhi; } - r = sqrt(rsq); + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); if (cut_respa == NULL) { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); ctable[i] = qqrd2e/r; etable[i] = qqrd2e/r * derfc; } else { - rtable[i] = rsq; + rtable[i] = rsq_lookup.f; ftable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2 - 1.0); ctable[i] = 0.0; etable[i] = qqrd2e/r * derfc; ptable[i] = qqrd2e/r; vtable[i] = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); ftable[i] += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); ctable[i] = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -997,10 +997,10 @@ } } } - minrsq = MIN(minrsq,rsq); + minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f); } - tabinnersq = minrsq; + tabinnersq = minrsq_lookup.f; int ntablem1 = ntable - 1; @@ -1038,16 +1038,16 @@ // if so, compute deltas between rsq and cut*cut double f_tmp,c_tmp,e_tmp,p_tmp,v_tmp; - itablemin = *int_minrsq & ncoulmask; + itablemin = minrsq_lookup.i & ncoulmask; itablemin >>= ncoulshiftbits; int itablemax = itablemin - 1; if (itablemin == 0) itablemax = ntablem1; - *int_rsq = itablemax << ncoulshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = itablemax << ncoulshiftbits; + rsq_lookup.i |= maskhi; - if (rsq < cut_coulsq) { - rsq = cut_coulsq; - r = sqrt(rsq); + if (rsq_lookup.f < cut_coulsq) { + rsq_lookup.f = cut_coulsq; + r = sqrtf(rsq_lookup.f); grij = g_ewald * r; expm2 = exp(-grij*grij); derfc = erfc(grij); @@ -1062,8 +1062,8 @@ e_tmp = qqrd2e/r * derfc; p_tmp = qqrd2e/r; v_tmp = qqrd2e/r * (derfc + EWALD_F*grij*expm2); - if (rsq > cut_respa[2]*cut_respa[2]) { - if (rsq < cut_respa[3]*cut_respa[3]) { + if (rsq_lookup.f > cut_respa[2]*cut_respa[2]) { + if (rsq_lookup.f < cut_respa[3]*cut_respa[3]) { rsw = (r - cut_respa[2])/(cut_respa[3] - cut_respa[2]); f_tmp += qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); c_tmp = qqrd2e/r * rsw*rsw*(3.0 - 2.0*rsw); @@ -1074,7 +1074,7 @@ } } - drtable[itablemax] = 1.0/(rsq - rtable[itablemax]); + drtable[itablemax] = 1.0/(rsq_lookup.f - rtable[itablemax]); dftable[itablemax] = f_tmp - ftable[itablemax]; dctable[itablemax] = c_tmp - ctable[itablemax]; detable[itablemax] = e_tmp - etable[itablemax]; @@ -1126,12 +1126,13 @@ eng += t-r; } else { // table real space - register float t = rsq; - register int k = (((int *) &t)[0] & ncoulmask)>>ncoulshiftbits; + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask) >> ncoulshiftbits; register double f = (rsq-rtable[k])*drtable[k], qiqj = q[i]*q[j]; - t = (1.0-factor_coul)*(ctable[k]+f*dctable[k]); - force_coul = qiqj*(ftable[k]+f*dftable[k]-t); - eng += qiqj*(etable[k]+f*detable[k]-t); + t.f = (1.0-factor_coul)*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + eng += qiqj*(etable[k]+f*detable[k]-t.f); } } else force_coul = 0.0; diff -Naur lammps-29Oct09/src/pair.cpp lammps-30Oct09/src/pair.cpp --- lammps-29Oct09/src/pair.cpp 2009-06-08 12:29:00.000000000 -0600 +++ lammps-30Oct09/src/pair.cpp 2009-10-29 16:03:02.000000000 -0600 @@ -899,8 +899,7 @@ } double r,e,f,rsq; - float rsq_float; - int *int_rsq = (int *) &rsq_float; + union_int_float_t rsq_lookup; for (int i = 0; i < n; i++) { if (style == R) { @@ -910,13 +909,13 @@ rsq = inner*inner + (outer*outer - inner*inner) * i/(n-1); r = sqrt(rsq); } else if (style == BMP) { - *int_rsq = i << nshiftbits; - *int_rsq = *int_rsq | masklo; - if (rsq_float < inner*inner) { - *int_rsq = i << nshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = i << nshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < inner*inner) { + rsq_lookup.i = i << nshiftbits; + rsq_lookup.i |= maskhi; } - rsq = rsq_float; + rsq = rsq_lookup.f; r = sqrt(rsq); } @@ -981,12 +980,11 @@ for (int j = 0; j < ntablebits+nshiftbits; j++) nmask *= 2; nmask -= 1; - float rsq; - int *int_rsq = (int *) &rsq; - rsq = outer*outer; - maskhi = *int_rsq & ~(nmask); - rsq = inner*inner; - masklo = *int_rsq & ~(nmask); + union_int_float_t rsq_lookup; + rsq_lookup.f = outer*outer; + maskhi = rsq_lookup.i & ~(nmask); + rsq_lookup.f = inner*inner; + masklo = rsq_lookup.i & ~(nmask); } /* ---------------------------------------------------------------------- */ diff -Naur lammps-29Oct09/src/pair.h lammps-30Oct09/src/pair.h --- lammps-29Oct09/src/pair.h 2009-08-14 14:54:10.000000000 -0600 +++ lammps-30Oct09/src/pair.h 2009-10-29 16:03:02.000000000 -0600 @@ -106,6 +106,10 @@ int ncoultablebits; // size of Coulomb table double tabinner; // inner cutoff for Coulomb table + // custom data type for accessing Coulomb tables + + typedef union {int i; float f;} union_int_float_t; + double THIRD; int evflag; // energy,virial settings diff -Naur lammps-29Oct09/src/pair_table.cpp lammps-30Oct09/src/pair_table.cpp --- lammps-29Oct09/src/pair_table.cpp 2009-08-08 14:06:53.000000000 -0600 +++ lammps-30Oct09/src/pair_table.cpp 2009-10-29 16:03:02.000000000 -0600 @@ -74,8 +74,8 @@ double rsq,factor_lj,fraction,value,a,b; int *ilist,*jlist,*numneigh,**firstneigh; Table *tb; - float rsq_single; - int *int_rsq = (int *) &rsq_single; + + union_int_float_t rsq_lookup; evdwl = 0.0; if (eflag || vflag) ev_setup(eflag,vflag); @@ -148,10 +148,10 @@ tb->deltasq6; fpair = factor_lj * value; } else { - rsq_single = rsq; - itable = *int_rsq & tb->nmask; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & tb->nmask; itable >>= tb->nshiftbits; - fraction = (rsq_single - tb->rsq[itable]) * tb->drsq[itable]; + fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable]; value = tb->f[itable] + fraction*tb->df[itable]; fpair = factor_lj * value; } @@ -394,8 +394,7 @@ int itmp; double rtmp; - float rsq; - int *int_rsq = (int *) &rsq; + union_int_float_t rsq_lookup; fgets(line,MAXLINE,fp); for (int i = 0; i < tb->ninput; i++) { @@ -409,13 +408,13 @@ (tb->rhi*tb->rhi - tb->rlo*tb->rlo)*i/(tb->ninput-1); rtmp = sqrt(rtmp); } else if (tb->rflag == BMP) { - *int_rsq = i << nshiftbits; - *int_rsq = *int_rsq | masklo; - if (rsq < tb->rlo*tb->rlo) { - *int_rsq = i << nshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = i << nshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < tb->rlo*tb->rlo) { + rsq_lookup.i = i << nshiftbits; + rsq_lookup.i |= maskhi; } - rtmp = sqrtf(rsq); + rtmp = sqrtf(rsq_lookup.f); } tb->rfile[i] = rtmp; @@ -678,8 +677,7 @@ if (tabstyle == BITMAP) { double r; - float rsq; - int *int_rsq = (int *) &rsq; + union_int_float_t rsq_lookup; int masklo,maskhi; // linear lookup tables of length ntable = 2^n @@ -696,20 +694,19 @@ tb->df = (double *) memory->smalloc(ntable*sizeof(double),"pair:df"); tb->drsq = (double *) memory->smalloc(ntable*sizeof(double),"pair:drsq"); - float minrsq; - int *int_minrsq = (int *) &minrsq; - *int_minrsq = 0 << tb->nshiftbits; - *int_minrsq = *int_minrsq | maskhi; + union_int_float_t minrsq_lookup; + minrsq_lookup.i = 0 << tb->nshiftbits; + minrsq_lookup.i |= maskhi; for (int i = 0; i < ntable; i++) { - *int_rsq = i << tb->nshiftbits; - *int_rsq = *int_rsq | masklo; - if (rsq < tb->innersq) { - *int_rsq = i << tb->nshiftbits; - *int_rsq = *int_rsq | maskhi; + rsq_lookup.i = i << tb->nshiftbits; + rsq_lookup.i |= masklo; + if (rsq_lookup.f < tb->innersq) { + rsq_lookup.i = i << tb->nshiftbits; + rsq_lookup.i |= maskhi; } - r = sqrtf(rsq); - tb->rsq[i] = rsq; + r = sqrtf(rsq_lookup.f); + tb->rsq[i] = rsq_lookup.f; if (tb->match) { tb->e[i] = tb->efile[i]; tb->f[i] = tb->ffile[i]/r; @@ -717,10 +714,10 @@ tb->e[i] = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); tb->f[i] = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; } - minrsq = MIN(minrsq,rsq); + minrsq_lookup.f = MIN(minrsq_lookup.f,rsq_lookup.f); } - tb->innersq = minrsq; + tb->innersq = minrsq_lookup.f; for (int i = 0; i < ntablem1; i++) { tb->de[i] = tb->e[i+1] - tb->e[i]; @@ -746,27 +743,27 @@ // deltas at itablemax-1 as a good approximation double e_tmp,f_tmp; - int itablemin = *int_minrsq & tb->nmask; + int itablemin = minrsq_lookup.i & tb->nmask; itablemin >>= tb->nshiftbits; int itablemax = itablemin - 1; if (itablemin == 0) itablemax = ntablem1; int itablemaxm1 = itablemax - 1; if (itablemax == 0) itablemaxm1 = ntablem1; - *int_rsq = itablemax << tb->nshiftbits; - *int_rsq = *int_rsq | maskhi; - if (rsq < tb->cut*tb->cut) { + rsq_lookup.i = itablemax << tb->nshiftbits; + rsq_lookup.i |= maskhi; + if (rsq_lookup.f < tb->cut*tb->cut) { if (tb->match) { tb->de[itablemax] = tb->de[itablemaxm1]; tb->df[itablemax] = tb->df[itablemaxm1]; tb->drsq[itablemax] = tb->drsq[itablemaxm1]; } else { - rsq = tb->cut*tb->cut; - r = sqrtf(rsq); + rsq_lookup.f = tb->cut*tb->cut; + r = sqrtf(rsq_lookup.f); e_tmp = splint(tb->rfile,tb->efile,tb->e2file,tb->ninput,r); f_tmp = splint(tb->rfile,tb->ffile,tb->f2file,tb->ninput,r)/r; tb->de[itablemax] = e_tmp - tb->e[itablemax]; tb->df[itablemax] = f_tmp - tb->f[itablemax]; - tb->drsq[itablemax] = 1.0/(rsq - tb->rsq[itablemax]); + tb->drsq[itablemax] = 1.0/(rsq_lookup.f - tb->rsq[itablemax]); } } } @@ -938,11 +935,11 @@ tb->deltasq6; fforce = factor_lj * value; } else { - float rsq_single = rsq; - int *int_rsq = (int *) &rsq_single; - itable = *int_rsq & tb->nmask; + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & tb->nmask; itable >>= tb->nshiftbits; - fraction = (rsq_single - tb->rsq[itable]) * tb->drsq[itable]; + fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable]; value = tb->f[itable] + fraction*tb->df[itable]; fforce = factor_lj * value; }