Skip to content
Snippets Groups Projects
Unverified Commit dbccbe67 authored by guo ran's avatar guo ran Committed by GitHub
Browse files

Refactor boxing sub_task_builder (#4159)


* refactor boxing_sub_task_builder

* refine

* refine

* refine

* refine

Co-authored-by: default avataroneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
parent 677723e9
No related branches found
No related tags found
No related merge requests found
Showing
with 443 additions and 442 deletions
......@@ -29,8 +29,8 @@ void NaiveActor::VirtualAsyncSendNaiveProducedRegstMsgToConsumer() {
REGISTER_ACTOR(TaskType::kSliceBoxing, NaiveActor);
REGISTER_ACTOR(TaskType::kBoxingIdentity, NaiveActor);
REGISTER_ACTOR(TaskType::kBoxingS2SAll2AllPack, NaiveActor);
REGISTER_ACTOR(TaskType::kBoxingS2SAll2AllUnpack, NaiveActor);
REGISTER_ACTOR(TaskType::kCollectiveBoxingPack, NaiveActor);
REGISTER_ACTOR(TaskType::kCollectiveBoxingUnpack, NaiveActor);
REGISTER_ACTOR(TaskType::kDecodeH2D, NaiveActor);
} // namespace oneflow
......@@ -19,24 +19,22 @@ limitations under the License.
namespace oneflow {
Maybe<SubTskGphBuilderStatus> B21SubTskGphBuilder::Build(
SubTskGphBuilderCtx* ctx, const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks, const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const {
if ((src_parallel_desc.parallel_num() == 1 || src_sbp_parallel.has_broadcast_parallel())
&& dst_parallel_desc.parallel_num() == 1) {
CompTaskNode* dst_node = sorted_dst_comp_tasks.front();
CompTaskNode* nearest_src_node =
SubTskGphBuilderUtil::FindNearestNode(sorted_src_comp_tasks, dst_node);
CHECK_NOTNULL(nearest_src_node);
TaskNode* proxy = ctx->GetProxyNode(nearest_src_node, nearest_src_node->MemZoneId121(),
dst_node->machine_id(), dst_node->MemZoneId121());
Connect<TaskNode>(proxy, ctx->task_graph()->NewEdge(), dst_node);
return TRY(BuildSubTskGphBuilderStatus(sorted_src_comp_tasks.front(),
sorted_dst_comp_tasks.front(), src_parallel_desc,
dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel,
lbi, logical_blob_desc, "B21SubTskGphBuilder", ""));
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const {
if ((in_parallel_desc.parallel_num() == 1 || in_sbp_parallel.has_broadcast_parallel())
&& out_parallel_desc.parallel_num() == 1) {
const int64_t out_parallel_id = 0;
const int64_t nearest_in_parallel_id = SubTskGphBuilderUtil::FindNearestSrcParallelId(
in_parallel_desc, out_parallel_desc, out_parallel_id);
TaskNode* nearest_in_node = sorted_in_tasks.at(nearest_in_parallel_id);
TaskNode* proxy = ctx->GetProxyNode(nearest_in_node, nearest_in_node->MemZoneId121(),
out_parallel_desc, out_parallel_id);
sorted_out_tasks->push_back(proxy);
return TRY(BuildSubTskGphBuilderStatus("B21SubTskGphBuilder", ""));
} else {
return Error::BoxingNotSupportedError();
}
......
......@@ -26,14 +26,13 @@ class B21SubTskGphBuilder final : public SubTskGphBuilder {
B21SubTskGphBuilder() = default;
~B21SubTskGphBuilder() override = default;
Maybe<SubTskGphBuilderStatus> Build(SubTskGphBuilderCtx* ctx,
const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks,
const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc,
const LogicalBlobId& lbi, const BlobDesc& logical_blob_desc,
const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const override;
Maybe<SubTskGphBuilderStatus> Build(
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const override;
};
} // namespace oneflow
......
......@@ -58,17 +58,23 @@ std::string ShapeToString(const Shape& shape) {
return shape_ss.str();
}
std::string SubTskGphBuilderStatusToCsvLine(const SubTskGphBuilderStatus& status) {
std::string MakeBoxingLoggerCsvRow(const SubTskGphBuilderStatus& status,
const std::string& src_op_name, const std::string& dst_op_name,
const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc,
const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc) {
std::string serialized_status;
serialized_status += status.src_op_name() + ",";
serialized_status += status.dst_op_name() + ",";
serialized_status += ParallelDescToString(status.src_parallel_desc()) + ",";
serialized_status += ParallelDescToString(status.dst_parallel_desc()) + ",";
serialized_status += SbpParallelToString(status.src_sbp_parallel()) + ",";
serialized_status += SbpParallelToString(status.dst_sbp_parallel()) + ",";
serialized_status += GenLogicalBlobName(status.lbi()) + ",";
serialized_status += DataType_Name(status.logical_blob_desc().data_type()) + ",";
serialized_status += ShapeToString(status.logical_blob_desc().shape()) + ",";
serialized_status += src_op_name + ",";
serialized_status += dst_op_name + ",";
serialized_status += ParallelDescToString(src_parallel_desc) + ",";
serialized_status += ParallelDescToString(dst_parallel_desc) + ",";
serialized_status += SbpParallelToString(src_sbp_parallel) + ",";
serialized_status += SbpParallelToString(dst_sbp_parallel) + ",";
serialized_status += GenLogicalBlobName(lbi) + ",";
serialized_status += DataType_Name(logical_blob_desc.data_type()) + ",";
serialized_status += ShapeToString(logical_blob_desc.shape()) + ",";
serialized_status += status.builder_name() + ",";
if (status.comment().empty()) {
serialized_status += "-";
......@@ -88,8 +94,14 @@ CsvBoxingLogger::CsvBoxingLogger(std::string path) {
CsvBoxingLogger::~CsvBoxingLogger() { log_stream_->Flush(); }
void CsvBoxingLogger::Log(const SubTskGphBuilderStatus& status) {
log_stream_ << SubTskGphBuilderStatusToCsvLine(status);
void CsvBoxingLogger::Log(const SubTskGphBuilderStatus& status, const std::string& src_op_name,
const std::string& dst_op_name, const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc,
const SbpParallel& src_sbp_parallel, const SbpParallel& dst_sbp_parallel,
const LogicalBlobId& lbi, const BlobDesc& logical_blob_desc) {
log_stream_ << MakeBoxingLoggerCsvRow(status, src_op_name, dst_op_name, src_parallel_desc,
dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel, lbi,
logical_blob_desc);
}
} // namespace oneflow
......@@ -27,7 +27,11 @@ class BoxingLogger {
BoxingLogger() = default;
virtual ~BoxingLogger() = default;
virtual void Log(const SubTskGphBuilderStatus& status) = 0;
virtual void Log(const SubTskGphBuilderStatus& status, const std::string& src_op_name,
const std::string& dst_op_name, const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc, const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc) = 0;
};
class NullBoxingLogger final : public BoxingLogger {
......@@ -36,7 +40,11 @@ class NullBoxingLogger final : public BoxingLogger {
NullBoxingLogger() = default;
~NullBoxingLogger() override = default;
void Log(const SubTskGphBuilderStatus& status) override{};
void Log(const SubTskGphBuilderStatus& status, const std::string& src_op_name,
const std::string& dst_op_name, const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc, const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc) override{};
};
class CsvBoxingLogger final : public BoxingLogger {
......@@ -46,7 +54,11 @@ class CsvBoxingLogger final : public BoxingLogger {
CsvBoxingLogger(std::string path);
~CsvBoxingLogger() override;
void Log(const SubTskGphBuilderStatus& status) override;
void Log(const SubTskGphBuilderStatus& status, const std::string& src_op_name,
const std::string& dst_op_name, const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc, const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc) override;
private:
std::unique_ptr<TeePersistentLogStream> log_stream_;
......
......@@ -19,15 +19,16 @@ limitations under the License.
namespace oneflow {
Maybe<SubTskGphBuilderStatus> ChainSubTskGphBuilder::Build(
SubTskGphBuilderCtx* ctx, const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks, const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const {
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const {
for (const auto& builder : builders_) {
Maybe<SubTskGphBuilderStatus> boxing_builder_status = TRY(builder->Build(
ctx, sorted_src_comp_tasks, sorted_dst_comp_tasks, src_parallel_desc, dst_parallel_desc,
lbi, logical_blob_desc, src_sbp_parallel, dst_sbp_parallel));
ctx, sorted_in_tasks, sorted_out_tasks, sorted_ctrl_tasks, in_parallel_desc,
out_parallel_desc, lbi, logical_blob_desc, in_sbp_parallel, out_sbp_parallel, time_shape));
if (!boxing_builder_status.IsOk()
&& SubTskGphBuilderUtil::IsErrorBoxingNotSupported(*boxing_builder_status.error())) {
continue;
......
......@@ -27,14 +27,13 @@ class ChainSubTskGphBuilder final : public SubTskGphBuilder {
: builders_(std::move(builders)) {}
~ChainSubTskGphBuilder() override = default;
Maybe<SubTskGphBuilderStatus> Build(SubTskGphBuilderCtx* ctx,
const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks,
const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc,
const LogicalBlobId& lbi, const BlobDesc& logical_blob_desc,
const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const override;
Maybe<SubTskGphBuilderStatus> Build(
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const override;
private:
std::vector<std::shared_ptr<SubTskGphBuilder>> builders_;
......
......@@ -26,14 +26,13 @@ class CollectiveBoxingSubTskGphBuilder final : public SubTskGphBuilder {
CollectiveBoxingSubTskGphBuilder();
~CollectiveBoxingSubTskGphBuilder() override = default;
Maybe<SubTskGphBuilderStatus> Build(SubTskGphBuilderCtx* ctx,
const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks,
const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc,
const LogicalBlobId& lbi, const BlobDesc& logical_blob_desc,
const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const override;
Maybe<SubTskGphBuilderStatus> Build(
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const override;
private:
std::unique_ptr<SubTskGphBuilder> chain_builder_;
......
......@@ -19,26 +19,23 @@ limitations under the License.
namespace oneflow {
Maybe<SubTskGphBuilderStatus> NaiveB2BSubTskGphBuilder::Build(
SubTskGphBuilderCtx* ctx, const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks, const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const {
if ((src_parallel_desc.parallel_num() == 1 || src_sbp_parallel.has_broadcast_parallel())
&& (dst_parallel_desc.parallel_num() == 1 || dst_sbp_parallel.has_broadcast_parallel())) {
std::vector<CompTaskNode*> nearest_src_comp_tasks;
for (CompTaskNode* dst_node : sorted_dst_comp_tasks) {
CompTaskNode* nearest_src_node =
SubTskGphBuilderUtil::FindNearestNode(sorted_src_comp_tasks, dst_node);
CHECK_NOTNULL(nearest_src_node);
TaskNode* proxy = ctx->GetProxyNode(nearest_src_node, nearest_src_node->MemZoneId121(),
dst_node->machine_id(), dst_node->MemZoneId121());
Connect<TaskNode>(proxy, ctx->task_graph()->NewEdge(), dst_node);
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const {
if ((in_parallel_desc.parallel_num() == 1 || in_sbp_parallel.has_broadcast_parallel())
&& (out_parallel_desc.parallel_num() == 1 || out_sbp_parallel.has_broadcast_parallel())) {
FOR_RANGE(int64_t, out_id, 0, out_parallel_desc.parallel_num()) {
const int64_t nearest_in_parallel_id = SubTskGphBuilderUtil::FindNearestSrcParallelId(
in_parallel_desc, out_parallel_desc, out_id);
TaskNode* nearest_in_node = sorted_in_tasks.at(nearest_in_parallel_id);
TaskNode* proxy = ctx->GetProxyNode(nearest_in_node, nearest_in_node->MemZoneId121(),
out_parallel_desc, out_id);
sorted_out_tasks->push_back(proxy);
}
return TRY(BuildSubTskGphBuilderStatus(sorted_src_comp_tasks.front(),
sorted_dst_comp_tasks.front(), src_parallel_desc,
dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel,
lbi, logical_blob_desc, "NaiveB2BSubTskGphBuilder", ""));
return TRY(BuildSubTskGphBuilderStatus("NaiveB2BSubTskGphBuilder", ""));
} else {
return Error::BoxingNotSupportedError();
}
......
......@@ -26,14 +26,13 @@ class NaiveB2BSubTskGphBuilder final : public SubTskGphBuilder {
NaiveB2BSubTskGphBuilder() = default;
~NaiveB2BSubTskGphBuilder() override = default;
Maybe<SubTskGphBuilderStatus> Build(SubTskGphBuilderCtx* ctx,
const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks,
const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc,
const LogicalBlobId& lbi, const BlobDesc& logical_blob_desc,
const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const override;
Maybe<SubTskGphBuilderStatus> Build(
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const override;
};
} // namespace oneflow
......
......@@ -20,51 +20,61 @@ limitations under the License.
namespace oneflow {
Maybe<SubTskGphBuilderStatus> NaiveB2PSubTskGphBuilder::Build(
SubTskGphBuilderCtx* ctx, const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks, const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const {
if ((src_parallel_desc.parallel_num() == 1 || src_sbp_parallel.has_broadcast_parallel())
&& dst_parallel_desc.parallel_num() != 1 && dst_sbp_parallel.has_partial_sum_parallel()) {
HashMap<CompTaskNode*, CompTaskNode*> dst_node2nearest_src_node;
int64_t nearest_dst_node_idx = -1;
int64_t nearest_dst_node_distance = -1;
std::vector<CompTaskNode*> nearest_src_comp_tasks;
for (int64_t dst_node_idx = 0; dst_node_idx < sorted_dst_comp_tasks.size(); ++dst_node_idx) {
CompTaskNode* dst_node = sorted_dst_comp_tasks.at(dst_node_idx);
const int64_t nearest_src_node_idx =
SubTskGphBuilderUtil::FindNearestNodeIndex(sorted_src_comp_tasks, dst_node);
CHECK_NE_OR_RETURN(nearest_src_node_idx, -1);
CompTaskNode* nearest_src_node = sorted_src_comp_tasks.at(nearest_src_node_idx);
CHECK_OR_RETURN(dst_node2nearest_src_node.emplace(dst_node, nearest_src_node).second);
const int64_t distance = SubTskGphBuilderUtil::GetDistance(nearest_src_node, dst_node);
if (nearest_dst_node_idx == -1 || distance < nearest_dst_node_distance) {
nearest_dst_node_idx = dst_node_idx;
nearest_dst_node_distance = distance;
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const {
if ((in_parallel_desc.parallel_num() == 1 || in_sbp_parallel.has_broadcast_parallel())
&& out_parallel_desc.parallel_num() != 1 && out_sbp_parallel.has_partial_sum_parallel()) {
HashMap<int64_t, int64_t> out_id2nearest_in_id;
int64_t nearest_out_node_idx = -1;
int64_t nearest_out_node_distance = -1;
FOR_RANGE(int64_t, out_id, 0, out_parallel_desc.parallel_num()) {
const int64_t nearest_in_parallel_id = SubTskGphBuilderUtil::FindNearestSrcParallelId(
in_parallel_desc, out_parallel_desc, out_id);
out_id2nearest_in_id.emplace(out_id, nearest_in_parallel_id);
const int64_t distance = SubTskGphBuilderUtil::GetDistance(
in_parallel_desc, nearest_in_parallel_id, out_parallel_desc, out_id);
if (nearest_out_node_idx == -1 || distance < nearest_out_node_distance) {
nearest_out_node_idx = out_id;
nearest_out_node_distance = distance;
}
}
for (int64_t dst_node_idx = 0; dst_node_idx < sorted_dst_comp_tasks.size(); ++dst_node_idx) {
CompTaskNode* dst_node = sorted_dst_comp_tasks.at(dst_node_idx);
CompTaskNode* nearest_src_node = dst_node2nearest_src_node.at(dst_node);
if (dst_node_idx == nearest_dst_node_idx) {
TaskNode* proxy = ctx->GetProxyNode(nearest_src_node, nearest_src_node->MemZoneId121(),
dst_node->machine_id(), dst_node->MemZoneId121());
Connect<TaskNode>(proxy, ctx->task_graph()->NewEdge(), dst_node);
FOR_RANGE(int64_t, out_id, 0, out_parallel_desc.parallel_num()) {
const int64_t nearest_in_id = out_id2nearest_in_id.at(out_id);
TaskNode* nearest_in_node = sorted_in_tasks.at(nearest_in_id);
if (out_id == nearest_out_node_idx) {
TaskNode* proxy = ctx->GetProxyNode(nearest_in_node, nearest_in_node->MemZoneId121(),
out_parallel_desc, out_id);
sorted_out_tasks->push_back(proxy);
} else {
const int64_t out_machine_id = CHECK_JUST(out_parallel_desc.MachineId4ParallelId(out_id));
const int64_t out_dev_phy_id = CHECK_JUST(out_parallel_desc.DeviceId4ParallelId(out_id));
int64_t thrd_id;
if (out_parallel_desc.device_type() == DeviceType::kGPU) {
#ifdef WITH_CUDA
thrd_id = Global<IDMgr>::Get()->GetGpuComputeThrdId(out_dev_phy_id);
#else
UNIMPLEMENTED();
#endif
} else if (out_parallel_desc.device_type() == DeviceType::kCPU) {
thrd_id = Global<IDMgr>::Get()->PickCpuThrdIdEvenly(out_machine_id);
} else {
UNIMPLEMENTED();
}
auto* zeros_node = ctx->task_graph()->NewNode<BoxingZerosTaskNode>();
zeros_node->Init(dst_node->machine_id(), dst_node->thrd_id(), dst_node->area_id(), lbi,
logical_blob_desc.shape(), logical_blob_desc.data_type(),
*nearest_src_node->logical_node()->out_blob_time_shape());
nearest_src_node->BuildCtrlRegstDesc(zeros_node);
Connect<TaskNode>(nearest_src_node, ctx->task_graph()->NewEdge(), zeros_node);
Connect<TaskNode>(zeros_node, ctx->task_graph()->NewEdge(), dst_node);
zeros_node->Init(out_machine_id, thrd_id, NewAreaId(), lbi, logical_blob_desc.shape(),
logical_blob_desc.data_type(), time_shape);
nearest_in_node->BuildCtrlRegstDesc(zeros_node);
Connect<TaskNode>(nearest_in_node, ctx->task_graph()->NewEdge(), zeros_node);
sorted_out_tasks->push_back(zeros_node);
}
}
return TRY(BuildSubTskGphBuilderStatus(sorted_src_comp_tasks.front(),
sorted_dst_comp_tasks.front(), src_parallel_desc,
dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel,
lbi, logical_blob_desc, "NaiveB2PSubTskGphBuilder", ""));
return TRY(BuildSubTskGphBuilderStatus("NaiveB2PSubTskGphBuilder", ""));
} else {
return Error::BoxingNotSupportedError();
}
......
......@@ -26,14 +26,13 @@ class NaiveB2PSubTskGphBuilder final : public SubTskGphBuilder {
NaiveB2PSubTskGphBuilder() = default;
~NaiveB2PSubTskGphBuilder() override = default;
Maybe<SubTskGphBuilderStatus> Build(SubTskGphBuilderCtx* ctx,
const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks,
const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc,
const LogicalBlobId& lbi, const BlobDesc& logical_blob_desc,
const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const override;
Maybe<SubTskGphBuilderStatus> Build(
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const override;
};
} // namespace oneflow
......
......@@ -19,26 +19,22 @@ limitations under the License.
namespace oneflow {
Maybe<SubTskGphBuilderStatus> OneToOneSubTskGphBuilder::Build(
SubTskGphBuilderCtx* ctx, const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks, const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const {
if ((src_parallel_desc.parallel_num() == 1 && dst_parallel_desc.parallel_num() == 1)
|| (src_parallel_desc.parallel_num() == dst_parallel_desc.parallel_num()
&& src_sbp_parallel == dst_sbp_parallel)) {
for (int64_t i = 0; i < src_parallel_desc.parallel_num(); ++i) {
CompTaskNode* src_node = sorted_src_comp_tasks.at(i);
CompTaskNode* dst_node = sorted_dst_comp_tasks.at(i);
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const {
if ((in_parallel_desc.parallel_num() == 1 && out_parallel_desc.parallel_num() == 1)
|| (in_parallel_desc.parallel_num() == out_parallel_desc.parallel_num()
&& in_sbp_parallel == out_sbp_parallel)) {
for (int64_t i = 0; i < in_parallel_desc.parallel_num(); ++i) {
TaskNode* in_node = sorted_in_tasks.at(i);
// TODO(liujuncheng): use lbi
TaskNode* proxy = ctx->GetProxyNode(src_node, src_node->MemZoneId121(),
dst_node->machine_id(), dst_node->MemZoneId121());
Connect<TaskNode>(proxy, ctx->task_graph()->NewEdge(), dst_node);
TaskNode* proxy = ctx->GetProxyNode(in_node, in_node->MemZoneId121(), out_parallel_desc, i);
sorted_out_tasks->push_back(proxy);
}
return TRY(BuildSubTskGphBuilderStatus(sorted_src_comp_tasks.front(),
sorted_dst_comp_tasks.front(), src_parallel_desc,
dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel,
lbi, logical_blob_desc, "OneToOneSubTskGphBuilder", ""));
return TRY(BuildSubTskGphBuilderStatus("OneToOneSubTskGphBuilder", ""));
} else {
return Error::BoxingNotSupportedError();
}
......
......@@ -26,14 +26,13 @@ class OneToOneSubTskGphBuilder final : public SubTskGphBuilder {
OneToOneSubTskGphBuilder() = default;
~OneToOneSubTskGphBuilder() override = default;
Maybe<SubTskGphBuilderStatus> Build(SubTskGphBuilderCtx* ctx,
const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks,
const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc,
const LogicalBlobId& lbi, const BlobDesc& logical_blob_desc,
const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const override;
Maybe<SubTskGphBuilderStatus> Build(
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const override;
};
} // namespace oneflow
......
......@@ -54,33 +54,34 @@ bool IsSameDevice(const ParallelDesc& in_pd, const ParallelDesc& out_pd,
} // namespace
Maybe<SubTskGphBuilderStatus> SliceBoxingSubTskGphBuilder::Build(
SubTskGphBuilderCtx* ctx, const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks, const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const {
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const {
if (SubTskGphBuilderUtil::BlobHasDynamicShape(logical_blob_desc)) {
return Error::BoxingNotSupportedError();
}
if (!SubTskGphBuilderUtil::IsDeviceTypeCPUOrGPU(src_parallel_desc)) {
if (!SubTskGphBuilderUtil::IsDeviceTypeCPUOrGPU(in_parallel_desc)) {
return Error::BoxingNotSupportedError();
}
if (!SubTskGphBuilderUtil::IsDeviceTypeCPUOrGPU(dst_parallel_desc)) {
if (!SubTskGphBuilderUtil::IsDeviceTypeCPUOrGPU(out_parallel_desc)) {
return Error::BoxingNotSupportedError();
}
if (SubTskGphBuilderUtil::HasEmptySliceIfSplit(src_parallel_desc.parallel_num(), src_sbp_parallel,
if (SubTskGphBuilderUtil::HasEmptySliceIfSplit(in_parallel_desc.parallel_num(), in_sbp_parallel,
logical_blob_desc)) {
return Error::BoxingNotSupportedError();
}
if (SubTskGphBuilderUtil::HasEmptySliceIfSplit(dst_parallel_desc.parallel_num(), dst_sbp_parallel,
if (SubTskGphBuilderUtil::HasEmptySliceIfSplit(out_parallel_desc.parallel_num(), out_sbp_parallel,
logical_blob_desc)) {
return Error::BoxingNotSupportedError();
}
if (!(SubTskGphBuilderUtil::IsBoxingS2B(src_sbp_parallel, dst_sbp_parallel)
|| SubTskGphBuilderUtil::IsBoxingS2S(src_sbp_parallel, dst_sbp_parallel)
|| SubTskGphBuilderUtil::IsBoxingP2S(src_sbp_parallel, dst_sbp_parallel)
|| SubTskGphBuilderUtil::IsBoxingP2B(src_sbp_parallel, dst_sbp_parallel)
|| SubTskGphBuilderUtil::IsBoxingB2S(src_sbp_parallel, dst_sbp_parallel))) {
if (!(SubTskGphBuilderUtil::IsBoxingS2B(in_sbp_parallel, out_sbp_parallel)
|| SubTskGphBuilderUtil::IsBoxingS2S(in_sbp_parallel, out_sbp_parallel)
|| SubTskGphBuilderUtil::IsBoxingP2S(in_sbp_parallel, out_sbp_parallel)
|| SubTskGphBuilderUtil::IsBoxingP2B(in_sbp_parallel, out_sbp_parallel)
|| SubTskGphBuilderUtil::IsBoxingB2S(in_sbp_parallel, out_sbp_parallel))) {
return Error::BoxingNotSupportedError();
}
const auto GetBoxingGpuThrdId = [](const int64_t dev_id, CudaWorkType work_type) -> int64_t {
......@@ -419,8 +420,7 @@ Maybe<SubTskGphBuilderStatus> SliceBoxingSubTskGphBuilder::Build(
};
const auto BuildSubTaskGphB2S =
[&ctx, &lbi, &CreateBoxingNode121, &CreateBoxingNodeToHost, &GetBoxingGpuThrdId, &NewEdge,
&sorted_src_comp_tasks, &sorted_dst_comp_tasks](
[&ctx, &lbi, &CreateBoxingNode121, &CreateBoxingNodeToHost, &GetBoxingGpuThrdId, &NewEdge](
const ParallelDesc& in_pd, const ParallelDesc& out_pd, const SbpParallel& in_sbp,
const SbpParallel& out_sbp, const BlobDesc& blob_desc,
const std::vector<TaskNode*>& in_nodes, std::vector<TaskNode*>* out_nodes) {
......@@ -432,39 +432,36 @@ Maybe<SubTskGphBuilderStatus> SliceBoxingSubTskGphBuilder::Build(
CHECK(!ContainsEmptySlice(out_slices));
FOR_RANGE(int64_t, out_id, 0, out_pd.parallel_num()) {
const TensorSliceView& out_slice = out_slices.at(out_id);
CompTaskNode* dst_node = sorted_dst_comp_tasks.at(out_id);
const int64_t nearest_idx =
SubTskGphBuilderUtil::FindNearestNodeIndex(sorted_src_comp_tasks, dst_node);
CompTaskNode* src_node = sorted_src_comp_tasks.at(nearest_idx);
SubTskGphBuilderUtil::FindNearestSrcParallelId(in_pd, out_pd, out_id);
TaskNode* in_node = in_nodes.at(nearest_idx);
SliceBoxingTaskNode* slice_node =
CreateBoxingNode121(in_pd, nearest_idx, out_slice, kSliceBoxingTaskModeCopy);
slice_node->ConnectToSrcNodeWithSlice(src_node, NewEdge(), in_slice);
TaskNode* out_node = ctx->GetProxyNode(slice_node, slice_node->MemZoneId121(),
dst_node->machine_id(), dst_node->MemZoneId121());
slice_node->ConnectToSrcNodeWithSlice(in_node, NewEdge(), in_slice);
TaskNode* out_node =
ctx->GetProxyNode(slice_node, slice_node->MemZoneId121(), out_pd, out_id);
out_nodes->push_back(out_node);
}
};
std::vector<TaskNode*> in_nodes;
in_nodes.assign(sorted_src_comp_tasks.begin(), sorted_src_comp_tasks.end());
std::vector<TaskNode*> out_nodes;
std::string comment;
if (SubTskGphBuilderUtil::IsBoxingS2B(src_sbp_parallel, dst_sbp_parallel)) {
BuildSubTaskGphS2B(src_parallel_desc, dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel,
logical_blob_desc, in_nodes, &out_nodes);
if (SubTskGphBuilderUtil::IsBoxingS2B(in_sbp_parallel, out_sbp_parallel)) {
BuildSubTaskGphS2B(in_parallel_desc, out_parallel_desc, in_sbp_parallel, out_sbp_parallel,
logical_blob_desc, sorted_in_tasks, sorted_out_tasks);
comment = "BuildSubTaskGphS2B";
} else if (SubTskGphBuilderUtil::IsBoxingS2S(src_sbp_parallel, dst_sbp_parallel)) {
BuildSubTaskGphS2S(src_parallel_desc, dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel,
logical_blob_desc, in_nodes, &out_nodes);
} else if (SubTskGphBuilderUtil::IsBoxingS2S(in_sbp_parallel, out_sbp_parallel)) {
BuildSubTaskGphS2S(in_parallel_desc, out_parallel_desc, in_sbp_parallel, out_sbp_parallel,
logical_blob_desc, sorted_in_tasks, sorted_out_tasks);
comment = "BuildSubTaskGphS2S";
} else if (SubTskGphBuilderUtil::IsBoxingP2S(src_sbp_parallel, dst_sbp_parallel)) {
BuildSubTaskGphP2S(src_parallel_desc, dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel,
logical_blob_desc, in_nodes, &out_nodes);
} else if (SubTskGphBuilderUtil::IsBoxingP2S(in_sbp_parallel, out_sbp_parallel)) {
BuildSubTaskGphP2S(in_parallel_desc, out_parallel_desc, in_sbp_parallel, out_sbp_parallel,
logical_blob_desc, sorted_in_tasks, sorted_out_tasks);
comment = "BuildSubTaskGphP2S";
} else if (SubTskGphBuilderUtil::IsBoxingP2B(src_sbp_parallel, dst_sbp_parallel)) {
if (logical_blob_desc.shape().elem_cnt() < dst_parallel_desc.parallel_num()) {
BuildSubTaskGphP2B(src_parallel_desc, dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel,
logical_blob_desc, in_nodes, &out_nodes);
} else if (SubTskGphBuilderUtil::IsBoxingP2B(in_sbp_parallel, out_sbp_parallel)) {
if (logical_blob_desc.shape().elem_cnt() < out_parallel_desc.parallel_num()) {
BuildSubTaskGphP2B(in_parallel_desc, out_parallel_desc, in_sbp_parallel, out_sbp_parallel,
logical_blob_desc, sorted_in_tasks, sorted_out_tasks);
comment = "BuildSubTaskGphP2B";
} else {
BlobDesc flat_blob_desc(logical_blob_desc.data_type());
......@@ -472,30 +469,26 @@ Maybe<SubTskGphBuilderStatus> SliceBoxingSubTskGphBuilder::Build(
std::vector<TaskNode*> middle_nodes;
SbpParallel middle_sbp;
middle_sbp.mutable_split_parallel()->set_axis(0);
BuildSubTaskGphP2S(src_parallel_desc, dst_parallel_desc, src_sbp_parallel, middle_sbp,
flat_blob_desc, in_nodes, &middle_nodes);
BuildSubTaskGphS2B(dst_parallel_desc, dst_parallel_desc, middle_sbp, dst_sbp_parallel,
flat_blob_desc, middle_nodes, &out_nodes);
BuildSubTaskGphP2S(in_parallel_desc, out_parallel_desc, in_sbp_parallel, middle_sbp,
flat_blob_desc, sorted_in_tasks, &middle_nodes);
BuildSubTaskGphS2B(out_parallel_desc, out_parallel_desc, middle_sbp, out_sbp_parallel,
flat_blob_desc, middle_nodes, sorted_out_tasks);
comment = "BuildSubTaskGphP2S->BuildSubTaskGphS2B";
for (TaskNode* out_node : out_nodes) {
for (TaskNode* out_node : *sorted_out_tasks) {
auto* slice_boxing_node = dynamic_cast<SliceBoxingTaskNode*>(out_node);
CHECK_NOTNULL(slice_boxing_node);
slice_boxing_node->SetOutShape(logical_blob_desc.shape());
}
}
} else if (SubTskGphBuilderUtil::IsBoxingB2S(src_sbp_parallel, dst_sbp_parallel)) {
BuildSubTaskGphB2S(src_parallel_desc, dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel,
logical_blob_desc, in_nodes, &out_nodes);
} else if (SubTskGphBuilderUtil::IsBoxingB2S(in_sbp_parallel, out_sbp_parallel)) {
BuildSubTaskGphB2S(in_parallel_desc, out_parallel_desc, in_sbp_parallel, out_sbp_parallel,
logical_blob_desc, sorted_in_tasks, sorted_out_tasks);
comment = "BuildSubTaskGphB2S";
} else {
UNIMPLEMENTED();
}
ctx->ConnectAll121(out_nodes, sorted_dst_comp_tasks);
return TRY(BuildSubTskGphBuilderStatus(
sorted_src_comp_tasks.front(), sorted_dst_comp_tasks.front(), src_parallel_desc,
dst_parallel_desc, src_sbp_parallel, dst_sbp_parallel, lbi, logical_blob_desc,
"SliceBoxingSubTskGphBuilder", comment));
return TRY(BuildSubTskGphBuilderStatus("SliceBoxingSubTskGphBuilder", comment));
}
} // namespace oneflow
......@@ -26,14 +26,13 @@ class SliceBoxingSubTskGphBuilder final : public SubTskGphBuilder {
SliceBoxingSubTskGphBuilder() = default;
~SliceBoxingSubTskGphBuilder() override = default;
Maybe<SubTskGphBuilderStatus> Build(SubTskGphBuilderCtx* ctx,
const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks,
const ParallelDesc& src_parallel_desc,
const ParallelDesc& dst_parallel_desc,
const LogicalBlobId& lbi, const BlobDesc& logical_blob_desc,
const SbpParallel& src_sbp_parallel,
const SbpParallel& dst_sbp_parallel) const override;
Maybe<SubTskGphBuilderStatus> Build(
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const override;
};
} // namespace oneflow
......
......@@ -29,11 +29,12 @@ class SubTskGphBuilder {
virtual ~SubTskGphBuilder() = default;
virtual Maybe<SubTskGphBuilderStatus> Build(
SubTskGphBuilderCtx* ctx, const std::vector<CompTaskNode*>& sorted_src_comp_tasks,
const std::vector<CompTaskNode*>& sorted_dst_comp_tasks,
const ParallelDesc& src_parallel_desc, const ParallelDesc& dst_parallel_desc,
const LogicalBlobId& lbi, const BlobDesc& logical_blob_desc,
const SbpParallel& src_sbp_parallel, const SbpParallel& dst_sbp_parallel) const = 0;
SubTskGphBuilderCtx* ctx, const std::vector<TaskNode*>& sorted_in_tasks,
std::vector<TaskNode*>* sorted_out_tasks,
std::vector<std::vector<TaskNode*>>* sorted_ctrl_tasks, const ParallelDesc& in_parallel_desc,
const ParallelDesc& out_parallel_desc, const LogicalBlobId& lbi,
const BlobDesc& logical_blob_desc, const SbpParallel& in_sbp_parallel,
const SbpParallel& out_sbp_parallel, const Shape& time_shape) const = 0;
};
} // namespace oneflow
......
......@@ -68,4 +68,23 @@ TaskNode* SubTskGphBuilderCtx::GetProxyNode(TaskNode* src_node, int64_t src_mem_
}
}
TaskNode* SubTskGphBuilderCtx::GetProxyNode(TaskNode* src_node, const int64_t src_mem_zone_id,
const ParallelDesc& dst_parallel_desc,
const int64_t dst_parallel_id) {
const int64_t dst_machine_id =
CHECK_JUST(dst_parallel_desc.MachineId4ParallelId(dst_parallel_id));
int64_t dst_mem_zone_id;
const IDMgr* id_mgr = Global<IDMgr>::Get();
if (dst_parallel_desc.device_type() == DeviceType::kCPU) {
dst_mem_zone_id = id_mgr->CpuMemZoneId();
} else if (dst_parallel_desc.device_type() == DeviceType::kGPU) {
const int64_t dst_dev_phy_id =
CHECK_JUST(dst_parallel_desc.DeviceId4ParallelId(dst_parallel_id));
dst_mem_zone_id = id_mgr->GpuMemZoneId(dst_dev_phy_id);
} else {
UNIMPLEMENTED();
}
return GetProxyNode(src_node, src_mem_zone_id, dst_machine_id, dst_mem_zone_id);
}
} // namespace oneflow
......@@ -34,6 +34,8 @@ class SubTskGphBuilderCtx final {
virtual TaskGraph* task_graph();
TaskNode* GetProxyNode(TaskNode* src_node, int64_t src_mem_zone_id, int64_t dst_machine_id,
int64_t dst_mem_zone_id);
TaskNode* GetProxyNode(TaskNode* src_node, int64_t src_mem_zone_id,
const ParallelDesc& dst_parallel_desc, const int64_t dst_parallel_id);
template<typename T1, typename T2>
void ConnectAll121(const std::vector<T1*>& src_nodes, const std::vector<T2*>& dst_nodes) {
CHECK_EQ(src_nodes.size(), dst_nodes.size());
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment