17 #define TINY_MASS 1.0e-16
18 #define N_2_COEF 1.6 // alpha
19 #define N_COEF 1.0 // beta
20 #define DOF_COEF -1.0 // gamma
21 #define CONST_COEF 14.4 // delta
27 ofstream
logfile(
"schedule.log");
40 Joint* _last_joint,
const fVec& _proc_costs,
42 int _first_proc,
int _last_proc,
69 for(p_joint_list::const_iterator j=org_outer_joints.begin(); j!=org_outer_joints.end(); j++)
101 for(joint_list::const_iterator
i=org_internal_joints.begin();
i!=org_internal_joints.end();
i++)
154 for(
n=
this;
n;
n=
n->Parent())
164 logfile <<
"create goal (0)" << endl;
180 logfile <<
"create goal (1)" << endl;
193 logfile <<
"target_parent = null, schedule_depth = " <<
schedule_depth <<
", child_id = " << child_id << endl;
195 int _first_proc = 0, _last_proc =
max_procs;
224 if(_last_proc - _first_proc == 1)
228 Joint* next_last = 0;
232 for(joint_list::iterator
i=in_joints.begin();
i!=in_joints.end();
i++)
253 nodes =
new dpNode* [n_internal_joints];
254 joint_list::iterator j;
259 #if 1 // always generate two non-empty subchains
260 if(
n->internal_joints[0].size() == 0 ||
261 n->internal_joints[1].size() == 0)
272 return n_internal_joints;
281 logfile <<
"calc_astar_cost(" << flush;
290 logfile <<
" one process: " << tmp << endl;
307 logfile <<
" child 0: " << tmp <<
" [" << n_myprocs <<
"]" << endl;
321 logfile <<
" child 1: " << tmp <<
" [" << n_myprocs <<
"]" << endl;
330 logfile <<
" final = " <<
c << endl;
371 all_internal.clear();
382 if(!(*j)->ParentSide())
421 all_internal.push_back(cur);
441 all_internal.push_back(cur);
448 logfile <<
"=== calc_min_subchain_cost ===" << endl;
449 logfile <<
"outer = [" << flush;
451 for(p_joint_list::const_iterator j=_outer_joints.begin(); j!=_outer_joints.end(); j++)
453 logfile <<
" " << (*j)->GetJoint()->name << flush;
454 if(!(*j)->ParentSide())
462 int* n_outer_parent_side =
new int [
sim->
NumJoint()];
463 int* n_outer_child_side =
new int [
sim->
NumJoint()];
466 n_outer_parent_side[
i] = 0;
467 n_outer_child_side[
i] = 0;
469 for(p_joint_list::const_iterator j=_outer_joints.begin(); j!=_outer_joints.end(); j++)
471 if((*j)->ParentSide())
473 Joint* cur = (*j)->GetJoint();
476 n_outer_child_side[p->i_joint]++;
482 n_outer_parent_side[
i] = _outer_joints.size() - n_outer_child_side[
i];
486 added_joints.
clear();
497 for(joint_list::iterator j=sorted_child.begin(); j!=sorted_child.end(); j++)
499 if(n_outer_child_side[
c->i_joint] <= n_outer_child_side[(*j)->i_joint])
502 sorted_child.insert(j,
c);
506 if(!done) sorted_child.push_back(
c);
508 for(joint_list::iterator j=sorted_child.begin(); j!=sorted_child.end(); j++)
513 delete[] n_outer_parent_side;
514 delete[] n_outer_child_side;
520 for(joint_list::const_iterator
i=jlist.begin();
i!=jlist.end();
i++)
522 if(*
i == jnt)
return true;
533 for(p_joint_list::const_iterator j=_outer_joints.begin(); j!=_outer_joints.end(); j++)
542 int my_outer = n_outer_child_side[cur->
i_joint];
543 if(n_outer_parent_side[cur->
i_joint] < n_outer_child_side[cur->
i_joint])
546 my_outer = n_outer_parent_side[cur->
i_joint];
555 for(joint_list::iterator j=sorted_child.begin(); j!=sorted_child.end(); j++)
557 if(n_outer_child_side[
c->i_joint] <= n_outer_child_side[(*j)->i_joint])
560 sorted_child.insert(j,
c);
564 if(!done) sorted_child.push_back(
c);
566 for(joint_list::iterator j=sorted_child.begin(); j!=sorted_child.end(); j++)
570 ret +=
add_to_parent_side(cur, added_joints, n_outer_parent_side, n_outer_child_side);
579 int n_child_outer = n_outer_child_side[
c->i_joint];
580 if(n_child_outer <= my_outer)
584 for(joint_list::iterator
i=smaller_child.begin();
i!=smaller_child.end();
i++)
586 int n = n_outer_child_side[(*i)->i_joint];
587 if(n_child_outer <=
n)
589 smaller_child.insert(
i,
c);
596 smaller_child.push_back(
c);
603 for(joint_list::iterator
i=larger_child.begin();
i!=larger_child.end();
i++)
605 int n = n_outer_child_side[(*i)->i_joint];
606 if(n_child_outer <=
n)
608 larger_child.insert(
i,
c);
615 larger_child.push_back(
c);
620 for(joint_list::iterator j=smaller_child.begin(); j!=smaller_child.end(); j++)
625 ret +=
add_to_child_side(cur, added_joints, n_outer_parent_side, n_outer_child_side);
627 for(joint_list::iterator j=larger_child.begin(); j!=larger_child.end(); j++)
640 int n_outer = n_outer_parent_side[cur->
i_joint];
641 int n_link_outer = 0;
644 if(!
is_in(added_joints,
c))
649 added_joints.push_back(cur);
651 logfile <<
" adding " << cur->
name <<
", n_outer = " << n_outer <<
", n_link_outer = " << n_link_outer <<
", cost = " << ret << endl;
658 int n_outer = n_outer_child_side[cur->
i_joint];
659 int n_link_outer = 0;
668 if(!
is_in(added_joints,
c) &&
c!=cur)
674 added_joints.push_back(cur);
676 logfile <<
" adding " << cur->
name <<
", n_outer = " << n_outer <<
", n_link_outer = " << n_link_outer <<
", cost = " << ret << endl;
700 joint_list::iterator
f;
704 for(
n=cur_leaf;
n;
n=
n->Parent())
724 joint_list::iterator
f;
728 for(
n=cur_leaf;
n;
n=
n->Parent())
754 all_vjoints.push_back(cur);
758 all_joints.push_back(cur);
767 logfile <<
"AutoSchedule(max_procs = " << _max_procs <<
")" << endl;
782 init_outer_joints.push_back(v_pjoints[0]);
783 init_outer_joints.push_back(v_pjoints[1]);
787 init_proc_costs.
zero();
795 logfile <<
"pSim::AutoSchedule: goal not found" << endl;
799 cerr <<
"goal found" << endl;
800 cerr <<
"cost = " << goal->
TotalCost() << endl;
802 cerr <<
"proc_costs = " <<
tran(proc_costs) << endl;
819 logfile <<
"joints = " << endl;
820 for(joint_list::iterator j=all_internal.begin(); j!=all_internal.end(); j++)
822 logfile <<
" " << count <<
" " << (*j)->name << endl;
827 logfile <<
"current count = " << count <<
", number of vjoints = " <<
all_vjoints.size() <<
", total n_joint = " <<
n_joint << endl;
828 logfile <<
"joints = " << endl;
829 for(
int i=0;
i<count;
i++)
836 logfile <<
"[" << count <<
"] = " << joints[count]->
name << endl;
849 _internal_joints_0.clear();
850 _internal_joints_1.clear();
851 joint_list::const_iterator j;
853 for(j=org_internal_joints.begin(); j!=org_internal_joints.end(); j++)
856 if(*j == _last_joint)
860 else if((*j)->isAscendant(_last_joint))
862 _internal_joints_0.push_back(*j);
867 _internal_joints_1.push_back(*j);
877 new_outer_joints.clear();
878 new_outer_joints.push_back(last_pjoint);
879 p_joint_list::const_iterator p;
880 int child_id = (last_pjoint->
ParentSide()) ? 1 : 0;
881 for(p=org_outer_joints.begin(); p!=org_outer_joints.end(); p++)
883 if((*p)->GetJoint()->isAscendant(last_pjoint->
GetJoint()))
886 new_outer_joints.push_back(*p);
888 else if(child_id == 1)
890 new_outer_joints.push_back(*p);
905 all_acc_types =
new MPI_Datatype [
max_procs];
907 MPI_Aint** disps =
new MPI_Aint* [
max_procs];
908 MPI_Datatype** oldtypes =
new MPI_Datatype* [
max_procs];
909 int* n_proc_joints =
new int [
max_procs];
914 disps[
i] =
new MPI_Aint [3*
n_joint];
915 oldtypes[
i] =
new MPI_Datatype [3*
n_joint];
916 n_proc_joints[
i] = 0;
918 subchains->create_types(n_proc_joints, lengths, disps, oldtypes);
921 MPI_Type_create_struct(n_proc_joints[
i], lengths[
i], disps[
i], oldtypes[
i], all_acc_types+
i);
922 MPI_Type_commit(all_acc_types+
i);
923 logfile <<
"[" << rank <<
"]: all_acc_types[" <<
i <<
"] = " << all_acc_types[
i] << endl;
930 delete[] oldtypes[
i];
935 delete[] n_proc_joints;
940 int pSubChain::assign_processes(
int start_rank,
int end_rank)
946 end_rank > start_rank+1)
948 int n_my_procs = end_rank - start_rank;
949 int n_half_procs = n_my_procs/2;
950 children[0]->assign_processes(start_rank, start_rank+n_half_procs);
951 children[1]->assign_processes(start_rank+n_half_procs, end_rank);
958 children[0]->assign_processes(start_rank, end_rank);
962 children[1]->assign_processes(start_rank, end_rank);
968 int pSubChain::create_types(
int* n_proc_joints,
int** _lengths, MPI_Aint** _disps, MPI_Datatype** _oldtypes)
977 int* lengths =
new int [n_mat];
978 MPI_Aint* disps =
new int [n_mat];
979 MPI_Datatype* oldtypes =
new int [n_mat];
985 oldtypes[count] = MPI_DOUBLE;
990 MPI_Type_create_struct(n_mat, lengths, disps, oldtypes, &parent_lambda_type);
991 MPI_Type_commit(&parent_lambda_type);
992 logfile <<
"[" <<
sim->rank <<
"]: parent_lambda_type = " << parent_lambda_type << endl;
997 oldtypes[
i] = MPI_DOUBLE;
1001 MPI_Type_create_struct(
n_outer_joints, lengths, disps, oldtypes, &parent_acc_type);
1002 MPI_Type_commit(&parent_acc_type);
1003 logfile <<
"[" <<
sim->rank <<
"]: parent_acc_type = " << parent_acc_type << endl;
1011 int* lengths =
new int [n_array];
1012 MPI_Aint* disps =
new int [n_array];
1013 MPI_Datatype* oldtypes =
new int [n_array];
1018 oldtypes[
i] = MPI_DOUBLE;
1028 MPI_Type_create_struct(
n_outer_joints+2, lengths, disps, oldtypes, &parent_force_type);
1029 MPI_Type_commit(&parent_force_type);
1030 logfile <<
"[" <<
sim->rank <<
"]: parent_force_type = " << parent_force_type << endl;
1038 int index = n_proc_joints[rank];
1039 _oldtypes[rank][index] = MPI_DOUBLE;
1040 _oldtypes[rank][index+1] = MPI_DOUBLE;
1041 _oldtypes[rank][index+2] = MPI_DOUBLE;
1043 MPI_Get_address(
last_pjoints[0]->f_final.data(), _disps[rank]+index+1);
1044 MPI_Get_address(
last_pjoints[1]->f_final.data(), _disps[rank]+index+2);
1046 _lengths[rank][index+1] = 6;
1047 _lengths[rank][index+2] = 6;
1048 n_proc_joints[rank] += 3;
1050 children[0]->create_types(n_proc_joints, _lengths, _disps, _oldtypes);
1076 if(j->
real)
return 0;
1348 cerr <<
"pSim::Schedule(joints): error- invalid joint order" << endl;
1355 for(
i=0;
i<_n_joints;
i++)
1357 cerr <<
"build_subchain_tree " << joints[
i]->
name << endl;
1368 int pj0_done =
false;
1369 int pj1_done =
false;
1372 subchain_list::iterator
i;
1380 if(!to_remove[0]) to_remove[0] = *
i;
1381 else if(!to_remove[1]) to_remove[1] = *
i;
1388 if(!to_remove[0]) to_remove[0] = *
i;
1389 else if(!to_remove[1]) to_remove[1] = *
i;
1392 if(!pj0_done && pj0->
plink)
1397 if(!pj1_done && pj1->
plink)
1402 if(to_remove[0])
buf.remove(to_remove[0]);
1403 if(to_remove[1])
buf.remove(to_remove[1]);
1404 buf.push_front(myp);
1410 if(sc->
links && sc->
links[0] == pl)
return 1;