diff -Naur lammps-16Nov09/doc/fix_imd.html lammps-17Nov09/doc/fix_imd.html --- lammps-16Nov09/doc/fix_imd.html 2009-11-06 09:15:26.000000000 -0700 +++ lammps-17Nov09/doc/fix_imd.html 2009-11-16 14:48:35.000000000 -0700 @@ -28,7 +28,10 @@ on = "unwrapped" coordinates using the image flags used fscale arg = factor factor = floating point number to scale IMD forces (default: 1.0) - trate arg = transmission rate of coordinate data sets (default: 1) + trate arg = transmission rate of coordinate data sets (default: 1) + nowait arg = on or off + off = LAMMPS waits to be connected to an IMD client before continuing (default) + on = LAMMPS listens for an IMD client, but continues with the run @@ -50,58 +53,66 @@ Illinois at Urbana-Champaign. We thank them for providing a software interface that allows codes like LAMMPS to hook to VMD.

-

Upon initialization of the fix, it will open a communication port -on the node with MPI task 0 and wait for an incoming connection. -As soon as an IMD client is connected, the simulation will continue -and the fix will send the current coordinates of the fix's group -to the IMD client at every trate MD step. When using r-RESPA, trate -applies to the steps of the outmost RESPA level. -During a run with an active IMD connection also the IMD client can -request to apply forces to selected atoms of the fix group. -

-

The port number selected must be an available network port number. -On many machines, port numbers < 1024 are reserved for accounts with -system manager privilege and specific applications. If multiple imd -fixes would be active at the same time, each needs to use a different +

Upon initialization of the fix, it will open a communication port on +the node with MPI task 0 and wait for an incoming connection. As soon +as an IMD client is connected, the simulation will continue and the +fix will send the current coordinates of the fix's group to the IMD +client at every trate MD step. When using r-RESPA, trate applies to +the steps of the outmost RESPA level. During a run with an active IMD +connection also the IMD client can request to apply forces to selected +atoms of the fix group. +

+

The port number selected must be an available network port number. On +many machines, port numbers < 1024 are reserved for accounts with +system manager privilege and specific applications. If multiple imd +fixes would be active at the same time, each needs to use a different port number.

-

The trate keyword allows to select how often the coordinate data -is sent to the IMD client. It can also be changed on request of -the IMD client through an IMD protocol message. -The unwrap keyword allows to send "unwrapped" coordinates to -the IMD client that undo the wrapping back of coordinates into -the principle unit cell, as done by default in LAMMPS. -The fscale keyword allows to apply a scaling factor to forces -transmitted by the IMD client. The IMD protocols stipulates that -forces are transferred in kcal/mol/angstrom under the assumption -that coordinates are given in angstrom. For LAMMPS runs with -different units or as a measure to tweak the forces generated -by the manipulation of the IMD client, this option allows to -make adjustments. -

-

To connect VMD to a waiting LAMMPS simulation on the same -machine with fix imd enabled, one needs to start VMD and load -a coordinate or topology file that matches the fix group. -When the VMD command prompts appears, one types the command line: +

The nowait keyword controls the behavior of the fix when no IMD +client is connected. With the default setting of off, LAMMPS will +wait until a connection is made before continuing with the +execution. Setting nowait to on will have the LAMMPS code be ready +to connect to a client, but continue with the simulation. This can for +example be used to monitor the progress of an ongoing calculation +without the need to be permanently connected or having to download a +trajectory file. +

+

The trate keyword allows to select how often the coordinate data is +sent to the IMD client. It can also be changed on request of the IMD +client through an IMD protocol message. The unwrap keyword allows +to send "unwrapped" coordinates to the IMD client that undo the +wrapping back of coordinates into the principle unit cell, as done by +default in LAMMPS. The fscale keyword allows to apply a scaling +factor to forces transmitted by the IMD client. The IMD protocols +stipulates that forces are transferred in kcal/mol/angstrom under the +assumption that coordinates are given in angstrom. For LAMMPS runs +with different units or as a measure to tweak the forces generated by +the manipulation of the IMD client, this option allows to make +adjustments. +

+

To connect VMD to a listening LAMMPS simulation on the same machine +with fix imd enabled, one needs to start VMD and load a coordinate or +topology file that matches the fix group. When the VMD command +prompts appears, one types the command line:

imd connect localhost 5678 
 

This assumes that fix imd was started with 5678 as a port number for the IMD protocol.

-

The steps to do interactive manipulation of a running simulation -in VMD are the following: +

The steps to do interactive manipulation of a running simulation in +VMD are the following:

-

In the Mouse menu of the VMD Main window, select "Mouse -> Force -> Atom". -You may alternately select "Residue", or "Fragment" to apply forces to -whole residues or fragments. Your mouse can now be used to apply forces -to your simulation. Click on an atom, residue, or fragment and drag to -apply a force. Click quickly without moving the mouse to turn the force -off. You can also use a variety of 3D position trackers to apply forces -to your simulation. Trackers with force-feedback such as the Sensable -PHANTOM allow you to feel the forces you are applying to your molecules, -as if they were real objects. See the VMD IMD Homepage for -more details. +

In the Mouse menu of the VMD Main window, select "Mouse -> Force -> +Atom". You may alternately select "Residue", or "Fragment" to apply +forces to whole residues or fragments. Your mouse can now be used to +apply forces to your simulation. Click on an atom, residue, or +fragment and drag to apply a force. Click quickly without moving the +mouse to turn the force off. You can also use a variety of 3D position +trackers to apply forces to your simulation. Trackers with +force-feedback such as the Sensable PHANTOM allow you to feel the +forces you are applying to your molecules, as if they were real +objects. See the VMD IMD Homepage for more details.

If IMD control messages are received, a line of text describing the message and its effect will be printed to the LAMMPS output screen, if @@ -133,8 +144,8 @@ ascending integer value; in VMD (and thus the IMD protocol) those will be assigned 0-based consecutive index numbers.

-

When using multiple active IMD connections at the same time, -each needs to use a different port number. +

When using multiple active IMD connections at the same time, each +needs to use a different port number.

Related commands: none

diff -Naur lammps-16Nov09/doc/fix_imd.txt lammps-17Nov09/doc/fix_imd.txt --- lammps-16Nov09/doc/fix_imd.txt 2009-11-06 09:15:26.000000000 -0700 +++ lammps-17Nov09/doc/fix_imd.txt 2009-11-16 14:48:35.000000000 -0700 @@ -21,7 +21,10 @@ on = "unwrapped" coordinates using the image flags used {fscale} arg = factor factor = floating point number to scale IMD forces (default: 1.0) - {trate} arg = transmission rate of coordinate data sets (default: 1) :pre + {trate} arg = transmission rate of coordinate data sets (default: 1) + {nowait} arg = {on} or {off} + off = LAMMPS waits to be connected to an IMD client before continuing (default) + on = LAMMPS listens for an IMD client, but continues with the run :pre :ule [Examples:] @@ -42,58 +45,66 @@ Illinois at Urbana-Champaign. We thank them for providing a software interface that allows codes like LAMMPS to hook to "VMD"_VMD. -Upon initialization of the fix, it will open a communication port -on the node with MPI task 0 and wait for an incoming connection. -As soon as an IMD client is connected, the simulation will continue -and the fix will send the current coordinates of the fix's group -to the IMD client at every trate MD step. When using r-RESPA, trate -applies to the steps of the outmost RESPA level. -During a run with an active IMD connection also the IMD client can -request to apply forces to selected atoms of the fix group. - -The port number selected must be an available network port number. -On many machines, port numbers < 1024 are reserved for accounts with -system manager privilege and specific applications. If multiple imd -fixes would be active at the same time, each needs to use a different +Upon initialization of the fix, it will open a communication port on +the node with MPI task 0 and wait for an incoming connection. As soon +as an IMD client is connected, the simulation will continue and the +fix will send the current coordinates of the fix's group to the IMD +client at every trate MD step. When using r-RESPA, trate applies to +the steps of the outmost RESPA level. During a run with an active IMD +connection also the IMD client can request to apply forces to selected +atoms of the fix group. + +The port number selected must be an available network port number. On +many machines, port numbers < 1024 are reserved for accounts with +system manager privilege and specific applications. If multiple imd +fixes would be active at the same time, each needs to use a different port number. -The {trate} keyword allows to select how often the coordinate data -is sent to the IMD client. It can also be changed on request of -the IMD client through an IMD protocol message. -The {unwrap} keyword allows to send "unwrapped" coordinates to -the IMD client that undo the wrapping back of coordinates into -the principle unit cell, as done by default in LAMMPS. -The {fscale} keyword allows to apply a scaling factor to forces -transmitted by the IMD client. The IMD protocols stipulates that -forces are transferred in kcal/mol/angstrom under the assumption -that coordinates are given in angstrom. For LAMMPS runs with -different units or as a measure to tweak the forces generated -by the manipulation of the IMD client, this option allows to -make adjustments. - -To connect VMD to a waiting LAMMPS simulation on the same -machine with fix imd enabled, one needs to start VMD and load -a coordinate or topology file that matches the fix group. -When the VMD command prompts appears, one types the command line: +The {nowait} keyword controls the behavior of the fix when no IMD +client is connected. With the default setting of {off}, LAMMPS will +wait until a connection is made before continuing with the +execution. Setting {nowait} to {on} will have the LAMMPS code be ready +to connect to a client, but continue with the simulation. This can for +example be used to monitor the progress of an ongoing calculation +without the need to be permanently connected or having to download a +trajectory file. + +The {trate} keyword allows to select how often the coordinate data is +sent to the IMD client. It can also be changed on request of the IMD +client through an IMD protocol message. The {unwrap} keyword allows +to send "unwrapped" coordinates to the IMD client that undo the +wrapping back of coordinates into the principle unit cell, as done by +default in LAMMPS. The {fscale} keyword allows to apply a scaling +factor to forces transmitted by the IMD client. The IMD protocols +stipulates that forces are transferred in kcal/mol/angstrom under the +assumption that coordinates are given in angstrom. For LAMMPS runs +with different units or as a measure to tweak the forces generated by +the manipulation of the IMD client, this option allows to make +adjustments. + +To connect VMD to a listening LAMMPS simulation on the same machine +with fix imd enabled, one needs to start VMD and load a coordinate or +topology file that matches the fix group. When the VMD command +prompts appears, one types the command line: imd connect localhost 5678 :pre This assumes that {fix imd} was started with 5678 as a port number for the IMD protocol. -The steps to do interactive manipulation of a running simulation -in VMD are the following: +The steps to do interactive manipulation of a running simulation in +VMD are the following: -In the Mouse menu of the VMD Main window, select "Mouse -> Force -> Atom". -You may alternately select "Residue", or "Fragment" to apply forces to -whole residues or fragments. Your mouse can now be used to apply forces -to your simulation. Click on an atom, residue, or fragment and drag to -apply a force. Click quickly without moving the mouse to turn the force -off. You can also use a variety of 3D position trackers to apply forces -to your simulation. Trackers with force-feedback such as the Sensable -PHANTOM allow you to feel the forces you are applying to your molecules, -as if they were real objects. See the "VMD IMD Homepage"_imdvmd for -more details. +In the Mouse menu of the VMD Main window, select "Mouse -> Force -> +Atom". You may alternately select "Residue", or "Fragment" to apply +forces to whole residues or fragments. Your mouse can now be used to +apply forces to your simulation. Click on an atom, residue, or +fragment and drag to apply a force. Click quickly without moving the +mouse to turn the force off. You can also use a variety of 3D position +trackers to apply forces to your simulation. Trackers with +force-feedback such as the Sensable PHANTOM allow you to feel the +forces you are applying to your molecules, as if they were real +objects. See the "VMD IMD Homepage"_imdvmd for more details. If IMD control messages are received, a line of text describing the message and its effect will be printed to the LAMMPS output screen, if @@ -124,8 +135,8 @@ ascending integer value; in VMD (and thus the IMD protocol) those will be assigned 0-based consecutive index numbers. -When using multiple active IMD connections at the same time, -each needs to use a different port number. +When using multiple active IMD connections at the same time, each +needs to use a different port number. [Related commands:] none diff -Naur lammps-16Nov09/lib/gpu/Makefile.nvidia lammps-17Nov09/lib/gpu/Makefile.nvidia --- lammps-16Nov09/lib/gpu/Makefile.nvidia 2009-08-13 10:58:35.000000000 -0600 +++ lammps-17Nov09/lib/gpu/Makefile.nvidia 2009-11-16 14:42:57.000000000 -0700 @@ -16,7 +16,7 @@ BIN_DIR = . OBJ_DIR = . AR = ar -CUDA_CPP = nvcc -I/usr/local/cuda/include -DUNIX -O3 -DDEBUG -Xptxas -v --use_fast_math +CUDA_CPP = nvcc -I/usr/local/cuda/include -DUNIX -O3 -Xptxas -v --use_fast_math CUDA_ARCH = -maxrregcount 128 #-arch=sm_13 CUDA_PREC = -D_SINGLE_SINGLE CUDA_LINK = -L/usr/local/cuda/lib64 -lcudart $(CUDA_LIB) diff -Naur lammps-16Nov09/lib/gpu/README lammps-17Nov09/lib/gpu/README --- lammps-16Nov09/lib/gpu/README 2009-08-11 13:00:24.000000000 -0600 +++ lammps-17Nov09/lib/gpu/README 2009-11-16 14:42:57.000000000 -0700 @@ -69,6 +69,9 @@ NOTE: Double precision is only supported on certain GPUS (with compute capability>=1.3). +NOTE: For Tesla and other graphics cards with compute capability>=1.3, + make sure that -arch=sm_13 is set on the CUDA_ARCH line. + NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE package has been installed before installing the GPU package in LAMMPS. diff -Naur lammps-16Nov09/lib/gpu/gb_gpu.cu lammps-17Nov09/lib/gpu/gb_gpu.cu --- lammps-16Nov09/lib/gpu/gb_gpu.cu 2009-09-04 09:03:44.000000000 -0600 +++ lammps-17Nov09/lib/gpu/gb_gpu.cu 2009-11-16 14:42:57.000000000 -0700 @@ -203,17 +203,19 @@ // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -int * gb_gpu_init(int &ij_size, const int ntypes, const double gamma, - const double upsilon, const double mu, double **shape, - double **well, double **cutsq, double **sigma, - double **epsilon, double *host_lshape, int **form, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int max_nbors, const int thread, const int gpu_id) { +bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma, + const double upsilon, const double mu, double **shape, + double **well, double **cutsq, double **sigma, + double **epsilon, double *host_lshape, int **form, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int max_nbors, const int thread, const int gpu_id) { assert(thread -void _gb_gpu_nbors(gbmtyp &gbm, const int num_ij, const bool eflag) { +void _gb_gpu_nbors(gbmtyp &gbm, const int *ij, const int num_ij, + const bool eflag) { gbm.nbor.time_nbor.add_to_total(); // CUDA_SAFE_CALL(cudaStreamSynchronize(gbm.pair_stream)); // Not if timed + memcpy(gbm.nbor.host_ij.begin(),ij,num_ij*sizeof(int)); gbm.nbor.time_nbor.start(); gbm.nbor.add(num_ij,gbm.pair_stream); gbm.nbor.time_nbor.stop(); } -void gb_gpu_nbors(const int num_ij, const bool eflag, const int thread) { - _gb_gpu_nbors(GBMF[thread],num_ij,eflag); +void gb_gpu_nbors(const int *ij, const int num_ij, const bool eflag, + const int thread) { + _gb_gpu_nbors(GBMF[thread],ij,num_ij,eflag); } // --------------------------------------------------------------------------- @@ -475,7 +480,7 @@ gbm.time_gayberne2.add_to_total(); gbm.time_pair.add_to_total(); } - // CUDA_SAFE_CALL(cudaStreamSynchronize(gbm.pair_stream)); // Not if timed + CUDA_SAFE_CALL(cudaStreamSynchronize(gbm.pair_stream)); evdw=gbm.atom.energy_virial(ilist,eflag_atom,vflag_atom,eatom,vatom,virial); gbm.atom.add_forces(ilist,f); diff -Naur lammps-16Nov09/lib/gpu/gb_gpu_memory.cu lammps-17Nov09/lib/gpu/gb_gpu_memory.cu --- lammps-16Nov09/lib/gpu/gb_gpu_memory.cu 2009-08-13 10:05:41.000000000 -0600 +++ lammps-17Nov09/lib/gpu/gb_gpu_memory.cu 2009-11-16 14:42:57.000000000 -0700 @@ -37,7 +37,7 @@ } template -int* GB_GPU_MemoryT::init(const int ij_size, const int ntypes, +bool GB_GPU_MemoryT::init(const int ij_size, const int ntypes, const double gamma, const double upsilon, const double mu, double **host_shape, double **host_well, double **host_cutsq, @@ -50,9 +50,11 @@ if (this->allocated) clear(); - LJ_GPU_MemoryT::init(ij_size,ntypes,host_cutsq,host_sigma,host_epsilon, - host_lj1, host_lj2, host_lj3, host_lj4, host_offset, - host_special_lj, max_nbors, me); + bool p=LJ_GPU_MemoryT::init(ij_size,ntypes,host_cutsq,host_sigma,host_epsilon, + host_lj1, host_lj2, host_lj3, host_lj4, + host_offset, host_special_lj, max_nbors, me); + if (!p) + return false; host_form=h_form; @@ -100,7 +102,7 @@ // Memory for ilist ordered by particle type host_olist.safe_alloc_rw(this->max_atoms); - return this->nbor.host_ij.begin(); + return true; } template diff -Naur lammps-16Nov09/lib/gpu/gb_gpu_memory.h lammps-17Nov09/lib/gpu/gb_gpu_memory.h --- lammps-16Nov09/lib/gpu/gb_gpu_memory.h 2009-08-13 10:05:41.000000000 -0600 +++ lammps-17Nov09/lib/gpu/gb_gpu_memory.h 2009-11-16 14:42:57.000000000 -0700 @@ -35,7 +35,7 @@ GB_GPU_Memory(); ~GB_GPU_Memory(); - int* init(const int ij_size, const int ntypes, const double gamma, + bool init(const int ij_size, const int ntypes, const double gamma, const double upsilon, const double mu, double **host_shape, double **host_well, double **host_cutsq, double **host_sigma, double **host_epsilon, double *host_lshape, int **h_form, diff -Naur lammps-16Nov09/lib/gpu/lj_gpu.cu lammps-17Nov09/lib/gpu/lj_gpu.cu --- lammps-16Nov09/lib/gpu/lj_gpu.cu 2009-08-13 10:05:41.000000000 -0600 +++ lammps-17Nov09/lib/gpu/lj_gpu.cu 2009-11-16 14:42:57.000000000 -0700 @@ -63,12 +63,13 @@ // --------------------------------------------------------------------------- // Allocate memory on host and device and copy constants to device // --------------------------------------------------------------------------- -int * lj_gpu_init(int &ij_size, const int ntypes, double **cutsq,double **sigma, - double **epsilon, double **host_lj1, double **host_lj2, - double **host_lj3, double **host_lj4, double **offset, - double *special_lj, const int max_nbors, const int gpu_id) { +bool lj_gpu_init(int &ij_size, const int ntypes, double **cutsq,double **sigma, + double **epsilon, double **host_lj1, double **host_lj2, + double **host_lj3, double **host_lj4, double **offset, + double *special_lj, const int max_nbors, const int gpu_id) { + LJMF.gpu.init(); if (LJMF.gpu.num_devices()==0) - return 0; + return false; ij_size=IJ_SIZE; return LJMF.init(ij_size, ntypes, cutsq, sigma, epsilon, host_lj1, host_lj2, @@ -142,17 +143,19 @@ // forces, and torques for those interactions // --------------------------------------------------------------------------- template -void _lj_gpu_nbors(LJMTyp &ljm, const int num_ij) { +void _lj_gpu_nbors(LJMTyp &ljm, const int *ij, const int num_ij) { ljm.nbor.time_nbor.add_to_total(); - // CUDA_SAFE_CALL(cudaStreamSynchronize(ljm.pair_stream)); // Not if timed + // CUDA_SAFE_CALL(cudaStreamSynchronize(ljm.pair_stream)); // Not if timed + + memcpy(ljm.nbor.host_ij.begin(),ij,num_ij*sizeof(int)); ljm.nbor.time_nbor.start(); ljm.nbor.add(num_ij,ljm.pair_stream); ljm.nbor.time_nbor.stop(); } -void lj_gpu_nbors(const int num_ij) { - _lj_gpu_nbors(LJMF,num_ij); +void lj_gpu_nbors(const int *ij, const int num_ij) { + _lj_gpu_nbors(LJMF,ij,num_ij); } // --------------------------------------------------------------------------- @@ -201,7 +204,7 @@ ljm.atom.time_atom.add_to_total(); ljm.nbor.time_nbor.add_to_total(); ljm.time_pair.add_to_total(); - // CUDA_SAFE_CALL(cudaStreamSynchronize(ljm.pair_stream)); // not if timed + CUDA_SAFE_CALL(cudaStreamSynchronize(ljm.pair_stream)); evdw=ljm.atom.energy_virial(ilist,eflag_atom,vflag_atom,eatom,vatom,virial); ljm.atom.add_forces(ilist,f); diff -Naur lammps-16Nov09/lib/gpu/lj_gpu_memory.cu lammps-17Nov09/lib/gpu/lj_gpu_memory.cu --- lammps-16Nov09/lib/gpu/lj_gpu_memory.cu 2009-08-13 10:05:41.000000000 -0600 +++ lammps-17Nov09/lib/gpu/lj_gpu_memory.cu 2009-11-16 14:42:57.000000000 -0700 @@ -39,7 +39,7 @@ } template -int* LJ_GPU_MemoryT::init(const int ij_size, const int ntypes, +bool LJ_GPU_MemoryT::init(const int ij_size, const int ntypes, double **host_cutsq, double **host_sigma, double **host_epsilon, double **host_lj1, double **host_lj2, double **host_lj3, @@ -50,10 +50,10 @@ clear(); if (me>=gpu.num_devices()) - return 0; + return false; gpu.set(me); if (gpu.revision()<1.0) - return 0; + return false; // Initialize timers for the selected GPU time_pair.init(); @@ -114,8 +114,7 @@ dev_error.zero(); allocated=true; - - return nbor.host_ij.begin(); + return true; } template diff -Naur lammps-16Nov09/lib/gpu/lj_gpu_memory.h lammps-17Nov09/lib/gpu/lj_gpu_memory.h --- lammps-16Nov09/lib/gpu/lj_gpu_memory.h 2009-08-13 10:05:41.000000000 -0600 +++ lammps-17Nov09/lib/gpu/lj_gpu_memory.h 2009-11-16 14:42:57.000000000 -0700 @@ -40,7 +40,7 @@ ~LJ_GPU_Memory() { clear(); } /// Allocate memory on host and device - int* init(const int ij_size, const int ntypes, double **host_cutsq, + bool init(const int ij_size, const int ntypes, double **host_cutsq, double **host_sigma, double **host_epsilon, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **host_offset, double *host_special_lj, diff -Naur lammps-16Nov09/lib/gpu/nvc_device.cu lammps-17Nov09/lib/gpu/nvc_device.cu --- lammps-16Nov09/lib/gpu/nvc_device.cu 2009-08-11 13:00:24.000000000 -0600 +++ lammps-17Nov09/lib/gpu/nvc_device.cu 2009-11-16 14:42:57.000000000 -0700 @@ -28,7 +28,9 @@ #include "nvc_device.h" // Grabs the properties for all devices -NVCDevice::NVCDevice() { +void NVCDevice::init() { + _properties.clear(); + CUDA_SAFE_CALL(cudaGetDeviceCount(&_num_devices)); for (int dev=0; dev<_num_devices; ++dev) { cudaDeviceProp deviceProp; diff -Naur lammps-16Nov09/lib/gpu/nvc_device.h lammps-17Nov09/lib/gpu/nvc_device.h --- lammps-16Nov09/lib/gpu/nvc_device.h 2009-08-11 13:00:24.000000000 -0600 +++ lammps-17Nov09/lib/gpu/nvc_device.h 2009-11-16 14:42:57.000000000 -0700 @@ -33,11 +33,16 @@ /// Class for looking at device properties /** \note Calls to change the device outside of the class results in incorrect * behavior - * \note There is no error checking for indexing past the number of devices **/ + * \note There is no error checking for indexing past the number of devices + * \note init() at least once before using any of the routines **/ class NVCDevice { public: /// Grabs the properties for all devices - NVCDevice(); + /** \note init() must be called following construction before any routines **/ + NVCDevice() {} + + /// Collect properties for every GPU on the node and set active GPU to ID 0 + void init(); /// Return the number of devices that support CUDA inline int num_devices() { return _properties.size(); } diff -Naur lammps-16Nov09/lib/gpu/nvc_get_devices.cu lammps-17Nov09/lib/gpu/nvc_get_devices.cu --- lammps-16Nov09/lib/gpu/nvc_get_devices.cu 2009-08-11 13:00:24.000000000 -0600 +++ lammps-17Nov09/lib/gpu/nvc_get_devices.cu 2009-11-16 14:42:57.000000000 -0700 @@ -25,6 +25,7 @@ int main(int argc, char** argv) { NVCDevice gpu; + gpu.init(); gpu.print_all(cout); return 0; } diff -Naur lammps-16Nov09/lib/gpu/nvc_macros.h lammps-17Nov09/lib/gpu/nvc_macros.h --- lammps-16Nov09/lib/gpu/nvc_macros.h 2009-08-11 13:00:24.000000000 -0600 +++ lammps-17Nov09/lib/gpu/nvc_macros.h 2009-11-16 14:42:57.000000000 -0700 @@ -20,7 +20,7 @@ template <> static __inline__ __device__ float cuda_zero() { return 0.0f; } -#ifdef DEBUG +#ifndef NO_DEBUG # define CU_SAFE_CALL_NO_SYNC( call ) do { \ CUresult err = call; \ diff -Naur lammps-16Nov09/lib/gpu/nvc_memory.h lammps-17Nov09/lib/gpu/nvc_memory.h --- lammps-16Nov09/lib/gpu/nvc_memory.h 2009-08-13 10:05:41.000000000 -0600 +++ lammps-17Nov09/lib/gpu/nvc_memory.h 2009-11-16 14:42:57.000000000 -0700 @@ -156,25 +156,26 @@ /// Asynchronous copy from device (numel is not bytes) inline void copy_from_device(const numtyp *device_p, size_t numel, cudaStream_t &stream) { - CUDA_SAFE_CALL(cudaMemcpyAsync(_array,device_p,numel*sizeof(numtyp), - cudaMemcpyDeviceToHost,stream)); + CUDA_SAFE_CALL_NO_SYNC(cudaMemcpyAsync(_array,device_p,numel*sizeof(numtyp), + cudaMemcpyDeviceToHost,stream)); } /// Asynchronous copy to device (numel is not bytes) inline void copy_to_device(numtyp *device_p, size_t numel, cudaStream_t &stream) { - CUDA_SAFE_CALL(cudaMemcpyAsync(device_p,_array,numel*sizeof(numtyp), - cudaMemcpyHostToDevice,stream)); + CUDA_SAFE_CALL_NO_SYNC(cudaMemcpyAsync(device_p,_array,numel*sizeof(numtyp), + cudaMemcpyHostToDevice,stream)); } /// Asynchronous copy to 2D matrix on device (numel is not bytes) inline void copy_to_2Ddevice(numtyp *device_p, const size_t dev_row_size, const size_t rows, const size_t cols, cudaStream_t &stream) { - CUDA_SAFE_CALL(cudaMemcpy2DAsync(device_p,dev_row_size*sizeof(numtyp), - _array,cols*sizeof(numtyp), - cols*sizeof(numtyp),rows, - cudaMemcpyHostToDevice,stream)); + CUDA_SAFE_CALL_NO_SYNC(cudaMemcpy2DAsync(device_p, + dev_row_size*sizeof(numtyp), + _array,cols*sizeof(numtyp), + cols*sizeof(numtyp),rows, + cudaMemcpyHostToDevice,stream)); } private: @@ -226,8 +227,8 @@ /// Asynchronous copy from host inline void copy_from_host(const numtyp *host_p, cudaStream_t &stream) - { CUDA_SAFE_CALL(cudaMemcpyAsync(_array,host_p,row_bytes(), - cudaMemcpyHostToDevice, stream)); } + { CUDA_SAFE_CALL_NO_SYNC(cudaMemcpyAsync(_array,host_p,row_bytes(), + cudaMemcpyHostToDevice, stream)); } /// Copy to host inline void copy_to_host(numtyp *host_p) @@ -328,17 +329,17 @@ /// Asynchronous copy from host (elements not bytes) inline void copy_from_host(const numtyp *host_p, const size_t numel, cudaStream_t &stream) - { CUDA_SAFE_CALL(cudaMemcpyAsync(_array,host_p,numel*sizeof(numtyp), - cudaMemcpyHostToDevice, stream)); } + { CUDA_SAFE_CALL_NO_SYNC(cudaMemcpyAsync(_array,host_p,numel*sizeof(numtyp), + cudaMemcpyHostToDevice, stream)); } /// Asynchronous Copy from Host /** \note Used when the number of columns/rows allocated on host smaller than * on device **/ inline void copy_2Dfrom_host(const numtyp *host_p, const size_t rows, const size_t cols, cudaStream_t &stream) { - CUDA_SAFE_CALL(cudaMemcpy2DAsync(_array, _pitch, host_p,cols*sizeof(numtyp), - cols*sizeof(numtyp), rows, - cudaMemcpyHostToDevice,stream)); + CUDA_SAFE_CALL_NO_SYNC(cudaMemcpy2DAsync(_array, _pitch, host_p, + cols*sizeof(numtyp), cols*sizeof(numtyp), rows, + cudaMemcpyHostToDevice,stream)); } private: @@ -416,9 +417,10 @@ /// Asynchronous Copy from Host inline void copy_from_host(const numtyp *host_p, cudaStream_t &stream) { - CUDA_SAFE_CALL(cudaMemcpyToArrayAsync(_array, 0, 0, host_p, - numel()*sizeof(numtyp), - cudaMemcpyHostToDevice,stream)); + CUDA_SAFE_CALL_NO_SYNC(cudaMemcpyToArrayAsync(_array, 0, 0, host_p, + numel()*sizeof(numtyp), + cudaMemcpyHostToDevice, + stream)); } /// Asynchronous Copy from Host @@ -426,9 +428,9 @@ * on device **/ inline void copy_2Dfrom_host(const numtyp *host_p, const size_t rows, const size_t cols, cudaStream_t &stream) { - CUDA_SAFE_CALL(cudaMemcpy2DToArrayAsync(_array, 0, 0, host_p, - cols*sizeof(numtyp), cols*sizeof(numtyp), rows, - cudaMemcpyHostToDevice,stream)); + CUDA_SAFE_CALL_NO_SYNC(cudaMemcpy2DToArrayAsync(_array, 0, 0, host_p, + cols*sizeof(numtyp), cols*sizeof(numtyp), rows, + cudaMemcpyHostToDevice,stream)); } /// Cast buffer to numtyp in host_write and copy to array diff -Naur lammps-16Nov09/src/GPU/pair_gayberne_gpu.cpp lammps-17Nov09/src/GPU/pair_gayberne_gpu.cpp --- lammps-16Nov09/src/GPU/pair_gayberne_gpu.cpp 2009-08-12 10:46:29.000000000 -0600 +++ lammps-17Nov09/src/GPU/pair_gayberne_gpu.cpp 2009-11-16 14:43:47.000000000 -0700 @@ -43,18 +43,19 @@ // External functions from cuda library for atom decomposition -int * gb_gpu_init(int &ij_size, const int ntypes, const double gamma, - const double upsilon, const double mu, double **shape, - double **well, double **cutsq, double **sigma, - double **epsilon, double *host_lshape, int **form, - double **host_lj1, double **host_lj2, double **host_lj3, - double **host_lj4, double **offset, double *special_lj, - const int max_nbors, const int thread, const int gpu_id); +bool gb_gpu_init(int &ij_size, const int ntypes, const double gamma, + const double upsilon, const double mu, double **shape, + double **well, double **cutsq, double **sigma, + double **epsilon, double *host_lshape, int **form, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **offset, double *special_lj, + const int max_nbors, const int thread, const int gpu_id); void gb_gpu_clear(const int thread); int * gb_gpu_reset_nbors(const int nall, const int nlocal, const int inum, int *ilist, const int *numj, const int *type, const int thread, bool &success); -void gb_gpu_nbors(const int num_ij, const bool eflag, const int thread); +void gb_gpu_nbors(const int *ij, const int num_ij, const bool eflag, + const int thread); void gb_gpu_atom(double **host_x, double **host_quat, const int *host_type, const bool rebuild, const int thread); void gb_gpu_gayberne(const bool eflag, const bool vflag, const bool rebuild, @@ -86,27 +87,23 @@ PairGayBerneGPU::~PairGayBerneGPU() { - if (comm->me == 0 && screen) { - printf("\n\n-------------------------------------"); - printf("--------------------------------\n"); - printf(" GPU Time Stamps: "); - printf("\n-------------------------------------"); - printf("--------------------------------\n"); - gb_gpu_time(my_thread); - printf("Procs: %d\n",comm->nprocs); - printf("-------------------------------------"); - printf("--------------------------------\n\n"); - } + printf("\n\n-------------------------------------"); + printf("--------------------------------\n"); + printf(" GPU Time Stamps: "); + printf("\n-------------------------------------"); + printf("--------------------------------\n"); + gb_gpu_time(my_thread); + printf("-------------------------------------"); + printf("--------------------------------\n\n"); + #pragma omp parallel { #ifdef GB_GPU_OMP int my_thread=omp_get_thread_num(); #endif gb_gpu_clear(my_thread); - if (ij_new[my_thread]!=NULL) { - ij_new[my_thread]=NULL; + if (ij_new[my_thread]!=NULL) delete [] ij_new[my_thread]; - } } } @@ -187,17 +184,14 @@ num_ij++; if (num_ij==ij_size) { - memcpy(ij[my_thread],ij_new[my_thread],num_ij*sizeof(int)); - gb_gpu_nbors(num_ij,eflag,my_thread); + gb_gpu_nbors(ij_new[my_thread],num_ij,eflag,my_thread); ijp=ij_new[my_thread]; num_ij=0; } } } - if (num_ij>0) { - memcpy(ij[my_thread],ij_new[my_thread],num_ij*sizeof(int)); - gb_gpu_nbors(num_ij,eflag,my_thread); - } + if (num_ij>0) + gb_gpu_nbors(ij_new[my_thread],num_ij,eflag,my_thread); } gb_gpu_gayberne(eflag,vflag,rebuild,my_thread); @@ -325,11 +319,11 @@ my_gpu+=multi_gpu_param; #endif - ij[my_thread]=gb_gpu_init(ij_size, atom->ntypes+1, gamma, upsilon, mu, - shape, well, cutsq, sigma, epsilon, lshape, form, - lj1, lj2, lj3, lj4, offset, force->special_lj, - neighbor->oneatom, my_thread, my_gpu); - if (ij[my_thread]==0) + bool init_ok=gb_gpu_init(ij_size, atom->ntypes+1, gamma, upsilon, mu, + shape, well, cutsq, sigma, epsilon, lshape, form, + lj1, lj2, lj3, lj4, offset, force->special_lj, + neighbor->oneatom, my_thread, my_gpu); + if (!init_ok) error->one("AT LEAST ONE PROCESS COULD NOT ALLOCATE A CUDA-ENABLED GPU."); if (ij_new[my_thread]!=NULL) diff -Naur lammps-16Nov09/src/GPU/pair_gayberne_gpu.h lammps-17Nov09/src/GPU/pair_gayberne_gpu.h --- lammps-16Nov09/src/GPU/pair_gayberne_gpu.h 2009-08-11 12:59:27.000000000 -0600 +++ lammps-17Nov09/src/GPU/pair_gayberne_gpu.h 2009-11-16 14:43:47.000000000 -0700 @@ -32,7 +32,7 @@ private: int ij_size; - int *ij[MAX_GPU_THREADS], *ij_new[MAX_GPU_THREADS], *olist[MAX_GPU_THREADS]; + int *ij_new[MAX_GPU_THREADS], *olist[MAX_GPU_THREADS]; int my_thread, nthreads, thread_inum[MAX_GPU_THREADS], omp_chunk; diff -Naur lammps-16Nov09/src/GPU/pair_lj_cut_gpu.cpp lammps-17Nov09/src/GPU/pair_lj_cut_gpu.cpp --- lammps-16Nov09/src/GPU/pair_lj_cut_gpu.cpp 2009-08-12 10:46:29.000000000 -0600 +++ lammps-17Nov09/src/GPU/pair_lj_cut_gpu.cpp 2009-11-16 14:43:47.000000000 -0700 @@ -39,15 +39,15 @@ // External functions from cuda library for force decomposition -int * lj_gpu_init(int &ij_size, const int ntypes, double **cutsq, - double **sigma, double **epsilon, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int max_nbors, - const int gpu_id); +bool lj_gpu_init(int &ij_size, const int ntypes, double **cutsq, + double **sigma, double **epsilon, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int max_nbors, + const int gpu_id); void lj_gpu_clear(); bool lj_gpu_reset_nbors(const int nall, const int inum, int *ilist, const int *numj); -void lj_gpu_nbors(const int num_ij); +void lj_gpu_nbors(const int *ij, const int num_ij); void lj_gpu_atom(double **host_x, const int *host_type, const bool rebuild); void lj_gpu(const bool eflag, const bool vflag, const bool rebuild); double lj_gpu_forces(double **f, const int *ilist, const bool eflag, @@ -75,22 +75,17 @@ PairLJCutGPU::~PairLJCutGPU() { - if (comm->me == 0 && screen) { - printf("\n\n-------------------------------------"); - printf("--------------------------------\n"); - printf(" GPU Time Stamps: "); - printf("\n-------------------------------------"); - printf("--------------------------------\n"); - lj_gpu_time(); - printf("Procs: %d\n",comm->nprocs); - printf("-------------------------------------"); - printf("--------------------------------\n\n"); - } + printf("\n\n-------------------------------------"); + printf("--------------------------------\n"); + printf(" GPU Time Stamps: "); + printf("\n-------------------------------------"); + printf("--------------------------------\n"); + lj_gpu_time(); + printf("-------------------------------------"); + printf("--------------------------------\n\n"); lj_gpu_clear(); - if (ij_new!=NULL) { - ij_new=NULL; + if (ij_new!=NULL) delete [] ij_new; - } } /* ---------------------------------------------------------------------- */ @@ -143,16 +138,14 @@ num_ij++; if (num_ij==ij_size) { - memcpy(ij,ij_new,num_ij*sizeof(int)); - lj_gpu_nbors(num_ij); + lj_gpu_nbors(ij_new, num_ij); ijp=ij_new; num_ij=0; } } } if (num_ij>0) { - memcpy(ij,ij_new,num_ij*sizeof(int)); - lj_gpu_nbors(num_ij); + lj_gpu_nbors(ij_new, num_ij); } } @@ -230,9 +223,8 @@ cutsq[i][j] = cutsq[j][i] = cut*cut; } - ij=lj_gpu_init(ij_size, atom->ntypes+1, cutsq, sigma, epsilon, lj1, lj2, lj3, - lj4, offset, force->special_lj, neighbor->oneatom, my_gpu); - if (ij==0) + if (!lj_gpu_init(ij_size, atom->ntypes+1, cutsq, sigma, epsilon, lj1, lj2,lj3, + lj4, offset, force->special_lj, neighbor->oneatom, my_gpu)) error->one("AT LEAST ONE PROCESS COULD NOT ALLOCATE A CUDA-ENABLED GPU."); if (ij_new!=NULL) diff -Naur lammps-16Nov09/src/GPU/pair_lj_cut_gpu.h lammps-17Nov09/src/GPU/pair_lj_cut_gpu.h --- lammps-16Nov09/src/GPU/pair_lj_cut_gpu.h 2009-08-11 12:59:27.000000000 -0600 +++ lammps-17Nov09/src/GPU/pair_lj_cut_gpu.h 2009-11-16 14:43:47.000000000 -0700 @@ -31,7 +31,7 @@ private: int ij_size; - int *ij, *ij_new; + int *ij_new; int last_neighbor, multi_gpu_mode, multi_gpu_param; }; diff -Naur lammps-16Nov09/src/USER-IMD/fix_imd.cpp lammps-17Nov09/src/USER-IMD/fix_imd.cpp --- lammps-16Nov09/src/USER-IMD/fix_imd.cpp 2009-11-06 08:38:50.000000000 -0700 +++ lammps-17Nov09/src/USER-IMD/fix_imd.cpp 2009-11-16 14:48:47.000000000 -0700 @@ -234,6 +234,8 @@ /* default values for optional flags */ unwrap_flag = 0; + nowait_flag = 0; + connect_msg = 1; imd_fscale = 1.0; imd_trate = 1; @@ -246,6 +248,12 @@ } else { unwrap_flag = 0; } + } else if (0 == strcmp(arg[argsdone], "nowait")) { + if (0 == strcmp(arg[argsdone+1], "on")) { + nowait_flag = 1; + } else { + nowait_flag = 0; + } } else if (0 == strcmp(arg[argsdone], "fscale")) { imd_fscale = atof(arg[argsdone+1]); } else if (0 == strcmp(arg[argsdone], "trate")) { @@ -338,42 +346,45 @@ } /* ---------------------------------------------------------------------- */ -/* wait for IMD client (e.g. VMD) to respond, initialize communication - * buffers and collect tag/id maps. */ -void FixIMD::setup(int) -{ - /* nme: number of atoms in group on this MPI task - * nmax: max number of atoms in group across all MPI tasks - * nlocal: all local atoms - */ - int i,j; - int nmax,nme,nlocal; - int *mask = atom->mask; - int *tag = atom->tag; - nlocal = atom->nlocal; - nme=0; - for (i=0; i < nlocal; ++i) - if (mask[i] & groupbit) ++nme; - - MPI_Allreduce(&nme,&nmax,1,MPI_INT,MPI_MAX,world); - maxbuf = nmax*size_one; - comm_buf = memory->smalloc(maxbuf,"imd:comm_buf"); +/* (re-)connect to an IMD client (e.g. VMD). return 1 if + new connection was made, 0 if not. */ +int FixIMD::reconnect() +{ /* set up IMD communication. */ + imd_terminate = 0; + imd_inactive = 0; + if (me == 0) { - if (screen) - fprintf(screen,"Waiting for IMD connection on port %d.\n",imd_port); + if (screen && connect_msg) + if (nowait_flag) + fprintf(screen,"Listening for IMD connection on port %d.\n",imd_port); + else + fprintf(screen,"Waiting for IMD connection on port %d.\n",imd_port); - int retval=0; - do { - retval = imdsock_selread(localsock, 60); - } while (retval <= 0); - clientsock = imdsock_accept(localsock); - - if (!clientsock) { - if (screen) - fprintf(screen, "IMD socket accept error. Dropping connection.\n"); - imd_terminate = 1; + connect_msg = 0; + clientsock = NULL; + if (nowait_flag) { + int retval = imdsock_selread(localsock,0); + if (retval > 0) { + clientsock = imdsock_accept(localsock); + } else { + imd_inactive = 1; + return 0; + } + } else { + int retval=0; + do { + retval = imdsock_selread(localsock, 60); + } while (retval <= 0); + clientsock = imdsock_accept(localsock); + } + + if (!imd_inactive && !clientsock) { + if (screen) + fprintf(screen, "IMD socket accept error. Dropping connection.\n"); + imd_terminate = 1; + return 0; } else { /* check endianness and IMD protocol version. */ if (imd_handshake(clientsock)) { @@ -381,6 +392,7 @@ fprintf(screen, "IMD handshake error. Dropping connection.\n"); imdsock_destroy(clientsock); imd_terminate = 1; + return 0; } else { int32 length; if (imdsock_selread(clientsock, 1) != 1 || @@ -389,10 +401,41 @@ fprintf(screen, "Incompatible IMD client version? Dropping connection.\n"); imdsock_destroy(clientsock); imd_terminate = 1; + return 0; + } else { + return 1; } } } } + return 0; +} + +/* ---------------------------------------------------------------------- */ +/* wait for IMD client (e.g. VMD) to respond, initialize communication + * buffers and collect tag/id maps. */ +void FixIMD::setup(int) +{ + /* nme: number of atoms in group on this MPI task + * nmax: max number of atoms in group across all MPI tasks + * nlocal: all local atoms + */ + int i,j; + int nmax,nme,nlocal; + int *mask = atom->mask; + int *tag = atom->tag; + nlocal = atom->nlocal; + nme=0; + for (i=0; i < nlocal; ++i) + if (mask[i] & groupbit) ++nme; + + MPI_Allreduce(&nme,&nmax,1,MPI_INT,MPI_MAX,world); + maxbuf = nmax*size_one; + comm_buf = memory->smalloc(maxbuf,"imd:comm_buf"); + + connect_msg = 1; + reconnect(); + MPI_Bcast(&imd_inactive, 1, MPI_INT, 0, world); MPI_Bcast(&imd_terminate, 1, MPI_INT, 0, world); if (imd_terminate) error->all("LAMMPS terminated on error in setting up IMD connection."); @@ -464,7 +507,16 @@ * Send coodinates, energies, and add IMD forces to atoms. */ void FixIMD::post_force(int vflag) { - if (imd_inactive) return; /* IMD client has detached. do nothing. */ + /* check for reconnect */ + if (imd_inactive) { + reconnect(); + MPI_Bcast(&imd_inactive, 1, MPI_INT, 0, world); + MPI_Bcast(&imd_terminate, 1, MPI_INT, 0, world); + if (imd_terminate) + error->all("LAMMPS terminated on error in setting up IMD connection."); + if (imd_inactive) + return; /* IMD client has detached and not yet come back. do nothing. */ + } int *tag = atom->tag; double **x = atom->x; @@ -614,37 +666,11 @@ force_buf = NULL; imdsock_destroy(clientsock); clientsock = NULL; - if (screen) { + if (screen) fprintf(screen, "IMD client detached. LAMMPS run continues.\n"); - fprintf(screen, "Waiting for new IMD connection on port %d.\n",imd_port); - } - int retval=0; - do { - retval = imdsock_selread(localsock, 60); - } while (retval <= 0); - clientsock = imdsock_accept(localsock); - if (!clientsock) { - if (screen) - fprintf(screen, "IMD socket accept error. Dropping connection.\n"); - imd_terminate = 1; - } else { - /* check endianness and IMD protocol version. */ - if (imd_handshake(clientsock)) { - if (screen) - fprintf(screen, "IMD handshake error. Dropping connection.\n"); - imdsock_destroy(clientsock); - imd_terminate = 1; - } else { - int32 length; - if (imdsock_selread(clientsock, 1) != 1 || - imd_recv_header(clientsock, &length) != IMD_GO) { - if (screen) - fprintf(screen, "Incompatible IMD client version? Dropping connection.\n"); - imdsock_destroy(clientsock); - imd_terminate = 1; - } - } - } + + connect_msg = 1; + reconnect(); if (imd_terminate) imd_inactive = 1; break; } @@ -708,11 +734,11 @@ buf = static_cast(force_buf); /* compare data to hash table */ - for (int i=0; i < length; ++i) { - buf[i].tag = rev_idmap[imd_tags[i]]; - buf[i].x = imd_fdat[3*i]; - buf[i].y = imd_fdat[3*i+1]; - buf[i].z = imd_fdat[3*i+2]; + for (int ii=0; ii < length; ++ii) { + buf[ii].tag = rev_idmap[imd_tags[ii]]; + buf[ii].x = imd_fdat[3*ii]; + buf[ii].y = imd_fdat[3*ii+1]; + buf[ii].z = imd_fdat[3*ii+2]; } delete[] imd_tags; delete[] imd_fdat; diff -Naur lammps-16Nov09/src/USER-IMD/fix_imd.h lammps-17Nov09/src/USER-IMD/fix_imd.h --- lammps-16Nov09/src/USER-IMD/fix_imd.h 2009-11-06 08:38:50.000000000 -0700 +++ lammps-17Nov09/src/USER-IMD/fix_imd.h 2009-11-16 14:48:47.000000000 -0700 @@ -80,9 +80,13 @@ int imd_trate; // IMD transmission rate. int unwrap_flag; // true if coordinates need to be unwrapped before sending + int nowait_flag; // true if LAMMPS should not wait with the execution for VMD. + int connect_msg; // flag to indicate whether a "listen for connection message" is needed. int me; // my MPI rank in this "world". int nlevels_respa; // flag to determine respa levels. + + int reconnect(); }; }