2、rIda,ldb,ldc,m,n,k,rowcomm,colcomm,iw(*)reala(lda,*),b(ldb,*),c(ldc,*),w(*)*integerlma,lka,lkb,lnb,lmc,Inc,ldw,ldwl*integernr,nc,rid,cid,ierr,res,arect,brect,nrbintegerroot,north,south,sta(mpi_status_size),i*callmpi_comm_size(colcomm,nr,ierr)callmpi_comm_rank(colcomm,rid,ierr)callmpi_comm_siz
3、e(rowcomm,nc,ierr)callmpi_comm_rank(rowcomm,cid,ierr)*lma=m/nrres=mod(m,nr)if(rid」t・res)lma=lma+1*lka=k/ncres=mod(k,nc)if(cid.It.res)lka=lka+1*lkb=Wnrres=mod(k,nr)if(rid.It.res)lkb=lkb+1*Inc=n/ncres=mod(n,nc)讦(cid」t.res)Inc=Inc+1lmc=lmalnb=Incldw=lma+1callmpi_allgather(lkb,1,mpi_integer,iw,1,
4、mpi_integer,&colcomm,ierr)nrb=iw(l)ldwl=ldbif(nF.ne.nc)returncallmpirect(Ida,lma,nrb,arect)callmpi_type_commit(arect,ierr)callmpirect(ldb,nrb,lnb,brect)callmpi_type_commit(brect,ierr)callwrapinita(a,Ida,lma,lka,rid,cid,nr,nc)callwrapinitb(b,ldb,lkb,lnb,rid,cid,nr,nc)callzeroc(c,ldc,lmc,Inc)no
5、rth=mod(nr+rid-1,nr)south=mod(rid+1,nr)root=0do100i=0,nr-1root=mod(rid+i,nr)callmcopy(a,Ida,w,ldw,lma,lka)callmpi_bcast(w,1,arect,root,rowcomm,ierr)k=root+1callsgemm(w,ldw,b,ldb,c,ldc,lma,iw(k),Inc)c=c+a*bif(i」t.nr-i)thencallmpi_sendrecv(b,1,brect,north,1,w,1,brect,&south,1、colcomm,sta,ierr)k
6、=mod(root+1,nr)+1callmcopy(w,ldwl,b,ldb,iw(k),lnb)endif100continuecallmpi_type_free(arect,ierr)callmpi_type_free(brect,ieiT)returnenddatamove.fprogrammainimplicitnoneinclude'mpif.h'*integercomm,np,iamintegerierrintegerm,n,sta(mpi_status_size),front,next*callmpibegin(comm,np,iam)print*「Hellowo
7、rld!onProc.iamfront=mod(iam-1+np,np)next=mod(iam+1,np)*easelm=iamgoto20if(mod(iam,2).eq.0・and.inm.ne.np・l)thencallmpi_recv(n,1,mpi_integer,iam+1,1,comm,sta,ierr)elseif(mod(iam,2).ne.0・and・iam.ne.0)thencallmpi_send(m,1,mpi_integer,iam-1,1,com