Skip to content
Snippets Groups Projects
Commit 07ab09a5 authored by Shiyuan Shang-Guan's avatar Shiyuan Shang-Guan Committed by Li Xinqi
Browse files

Merge master to develop (part 3) (#1657)

* gpu (#1310)

* Fix snapshot (#1320)

* fix bug of snapshot

* refine distribute.sh

* use more accurate function calls

* rename function

* update for model parallel

* refine code

* feat: enhance cmake download & options (#1281)

* feat: enhance cmake download & options

* feat(tools/): add share libs build scripts

* fix: add cmake options

* feat: add 3rd party download

* chore: updat README

* fix: fix protobuf & cmake repo

* fix: fix options name

* chore: merge 3rd_party.cmake & third_party.cmake

* chore: revert pre cmake URL fix

* chore: update ExternalProject check

* fix: fix typo & missing download

* fix: fix download url

* chore: update readme

* chore: fix typo

* fix: fix bugs

* fix: fix bugs

* fix: fix pre

* print all third party libs

* refine readme

* DOWNLOAD_THIRD_PARTY -> PRECOMPILED_THIRD_PARTY

* refine readme

* minor typo fix

* Fix bug in model parallel (#1345)

* fix conv in model parallel

* add TODO

* Fix bug in gcc54 (#1352)

* fix bug in gcc 5.4

* update

* refine ibverbs lib (#1391)

* refine link ibverbs lib

* modify minor

* fix a little bug in accuracy print (#1403)

* batch num for prediction (#1405)

* batch num for prediction

* !Train() => Predict()

* fix normlization epsilon check (#1433)

* Fix normlization epsilon check (#1441)

* fix normlization epsilon check

* remove check, fix eplison value in op_conf

* align with tensorflow (#1461)

* Dev crop with random size (#1468)

* random size crop proto

* ImagePreprocessImpl::<kCropWithRandomSize>

* clang format

* MaxVal

* Dev jinyi offline build (#1476)

* chore: remove pre compiler funcs

* chore: add submoudles

* fix: fix project build URL from git_url -> submodule_dir_url

* fix: fix submodule commit id

* fix: fix .gitmodules

* chore: mv third_party dir

* chore: remove test-driver(glog#188) link in glog submodule

* fix: update glog from: da816ea70645e463aa04f9564544939fa327d5a7 ==> to: 4f3e18bf26cdb794fc66cec348f57b5838a0c929

* chore: update README.md

* Dev prelu (#1474)

* GPU impl of prelu

* better naming

* address review

* renaming and use ? :

* address reviews on op

* change op conf

* rename weight

* allow 2d+

* not elementwise op

* naive impl

* minor fix

* rename

* remove dupl

* refacor to remove duplicate

* remove dup

* remove dup

* reverse condition

* move code

* remove useless arg

* refactoring

* add empty line

* fix jpeg encoder quality (#1450)

* fix(actor.cpp): never access regst after sending it to producer (#1531)

* fix(boxing_actor): not handle ctrl regst in NormalProcessNaiveReadableRegstMsg() (#1520)

* Dev center crop (#1542)

* center crop

* update

* add scalar_mul (#1553)

*  refactor(actor/*): update the {RegstNameType, {}} to std::make_pair (#1605)

* fix(boxing_actor): not handle ctrl regst in NormalProcessNaiveReadableRegstMsg()

* refactor(actor/*): update the {RegstNameType, {}} to std::make_pair

* fix record_num in blob (#1619)

* fix record_num in blob

* add comment
parent 74b47d57
No related branches found
No related tags found
No related merge requests found
Showing
with 496 additions and 16 deletions
......@@ -154,11 +154,11 @@ class Actor {
// NaiveOrCustomized
virtual std::pair<RegstNameType, HashSet<std::string>>
GetNaiveOrCustomizedConsumedRegstDescName() {
return {RegstNameType::kCustomized, {}};
return std::make_pair(RegstNameType::kCustomized, HashSet<std::string>{});
}
virtual std::pair<RegstNameType, HashSet<std::string>>
GetNaiveOrCustomizedProducedRegstDescName() {
return {RegstNameType::kCustomized, {}};
return std::make_pair(RegstNameType::kCustomized, HashSet<std::string>{});
}
void TakeOverNaiveConsumed(const PbMap<std::string, RegstDescIdSet>& consumed_ids);
void TakeOverNaiveProduced(const PbMap<std::string, RegstDescProto>& produced_ids);
......
......@@ -10,6 +10,7 @@ void BoxingActor::VirtualActorInit(const TaskProto& task_proto) {
}
void BoxingActor::NormalProcessNaiveReadableDataRegstMsg(const std::deque<Regst*>& rq) {
if (rq.back()->regst_desc()->regst_desc_type().has_data_regst_desc() == false) { return; }
if (rq.back()->packed_blob()->max_col_num() > 1 && col_id_order_ == ColIdOrder::kUnCertain) {
TrySetColIdOrder(rq.back());
}
......
......@@ -26,7 +26,7 @@ class CopyCommNetActor final : public Actor {
std::pair<RegstNameType, HashSet<std::string>> GetNaiveOrCustomizedConsumedRegstDescName()
override {
return {RegstNameType::kNaive, {}};
return std::make_pair(RegstNameType::kNaive, HashSet<std::string>{});
}
void ForEachCurCustomizedReadableRegst(std::function<void(const Regst*)>) const override;
void NormalProcessCustomizedEordMsg(const ActorMsg&) override { is_in_eord_ = true; }
......
......@@ -16,7 +16,7 @@ class DecodeRandomActor final : public CompActor {
void Act() override;
std::pair<RegstNameType, HashSet<std::string>> GetNaiveOrCustomizedConsumedRegstDescName()
override {
return {RegstNameType::kNaive, {}};
return std::make_pair(RegstNameType::kNaive, HashSet<std::string>{});
}
bool IsCustomizedReadReady() override;
bool IsCustomizedReadAlwaysUnReadyFromNow() override { return !IsCustomizedReadReady(); }
......
......@@ -36,7 +36,7 @@ class InputWiseCompActor : public CompActor {
void AsyncReturnAllCustomizedReadableRegst() override;
std::pair<RegstNameType, HashSet<std::string>> GetNaiveOrCustomizedConsumedRegstDescName()
override {
return {RegstNameType::kNaive, {}};
return std::make_pair(RegstNameType::kNaive, HashSet<std::string>{});
}
void VirtualAsyncSendNaiveProducedRegstMsgToConsumer() override;
void AsyncSendCustomizedConsumedRegstMsgToProducer() override;
......
......@@ -22,7 +22,7 @@ class NormalBackwardCompActor final : public CompActor {
void AsyncReturnAllCustomizedReadableRegst() override;
std::pair<RegstNameType, HashSet<std::string>> GetNaiveOrCustomizedConsumedRegstDescName()
override {
return {RegstNameType::kNaive, {"activation", "data_tmp", "out", "out_diff", "in"}};
return std::make_pair(RegstNameType::kNaive, HashSet<std::string>{"activation", "data_tmp", "out", "out_diff", "in"});
}
void VirtualAsyncSendNaiveProducedRegstMsgToConsumer() override;
void AsyncSendCustomizedConsumedRegstMsgToProducer() override;
......
......@@ -20,11 +20,11 @@ class NormalForwardCompActor final : public CompActor {
void AsyncReturnAllCustomizedReadableRegst() override;
std::pair<RegstNameType, HashSet<std::string>> GetNaiveOrCustomizedConsumedRegstDescName()
override {
return {RegstNameType::kNaive, {"in"}};
return std::make_pair(RegstNameType::kNaive, HashSet<std::string>{"in"});
}
std::pair<RegstNameType, HashSet<std::string>> GetNaiveOrCustomizedProducedRegstDescName()
override {
return {RegstNameType::kCustomized, {"const_buf"}};
return std::make_pair(RegstNameType::kCustomized, HashSet<std::string>{"const_buf"});
}
void AsyncSendCustomizedProducedRegstMsgToConsumer() override {}
void VirtualAsyncSendNaiveProducedRegstMsgToConsumer() override;
......
......@@ -16,7 +16,7 @@ class NormalMdUpdtCompActor final : public CompActor {
void Act() override;
std::pair<RegstNameType, HashSet<std::string>> GetNaiveOrCustomizedProducedRegstDescName()
override {
return {RegstNameType::kCustomized, {"const_model"}};
return std::make_pair(RegstNameType::kCustomized, HashSet<std::string>{"const_model"});
}
void AsyncSendCustomizedProducedRegstMsgToConsumer() override {}
void VirtualAsyncSendNaiveProducedRegstMsgToConsumer() override;
......
......@@ -17,7 +17,7 @@ class RecordLoadActor final : public CompActor {
void Act() override;
std::pair<RegstNameType, HashSet<std::string>> GetNaiveOrCustomizedConsumedRegstDescName()
override {
return {RegstNameType::kNaive, {}};
return std::make_pair(RegstNameType::kNaive, HashSet<std::string>{});
}
void VirtualAsyncSendNaiveProducedRegstMsgToConsumer() override;
bool IsCustomizedReadReady() override;
......
......@@ -173,6 +173,10 @@ message RecordLoadKernelConf {
required int64 device_piece_size = 1;
}
message PReluKernelConf {
repeated int32 perm = 1;
}
message KernelConf {
required OpAttribute op_attribute = 1;
required bool is_forward = 2;
......@@ -215,5 +219,6 @@ message KernelConf {
GatherKernelConf gather_conf = 406;
VariableKernelConf variable_conf = 407;
RecordLoadKernelConf record_load_conf = 408;
PReluKernelConf prelu_conf = 409;
}
}
#include "oneflow/core/kernel/prelu_kernel.h"
namespace oneflow {
template<DeviceType device_type, typename T>
void PReluKernel<device_type, T>::ForwardDataContent(
const KernelCtx& ctx, std::function<Blob*(const std::string&)> BnInOp2Blob) const {
PReluKernelUtil<device_type, T>::Forward(ctx, this->op_conf().prelu_conf(), BnInOp2Blob("in"),
BnInOp2Blob("alpha"), BnInOp2Blob("out"));
}
template<DeviceType device_type, typename T>
void PReluKernel<device_type, T>::BackwardDataContent(
const KernelCtx& ctx, std::function<Blob*(const std::string&)> BnInOp2Blob) const {
Blob* in_diff_blob = BnInOp2Blob("in_diff");
Blob* alpha_diff_blob = BnInOp2Blob("alpha_diff");
if (in_diff_blob == nullptr) { return; }
Memset<device_type>(ctx.device_ctx, in_diff_blob->mut_dptr<T>(), 0,
in_diff_blob->ByteSizeOfDataContentField());
Memset<device_type>(ctx.device_ctx, alpha_diff_blob->mut_dptr<T>(), 0,
alpha_diff_blob->ByteSizeOfDataContentField());
PReluKernelUtil<device_type, T>::Backward(
ctx, this->op_conf().prelu_conf(), this->kernel_conf().prelu_conf().perm(), BnInOp2Blob("in"),
BnInOp2Blob("alpha"), BnInOp2Blob("out_diff"), BnInOp2Blob("bw_buf"), in_diff_blob,
alpha_diff_blob);
}
template<typename T>
struct PReluKernelUtil<DeviceType::kCPU, T> {
static void Forward(const KernelCtx& ctx, const PReluOpConf& conf, const Blob* in_blob,
const Blob* alpha_blob, Blob* out_blob) {
const T* in_dptr = in_blob->dptr<T>();
const T* alpha_dptr = alpha_blob->dptr<T>();
T* out_dptr = out_blob->mut_dptr<T>();
const int64_t elem_cnt = in_blob->shape().elem_cnt();
if (conf.channel_shared()) {
FOR_RANGE(int64_t, i, 0, elem_cnt) {
out_dptr[i] = (in_dptr[i] >= 0) ? in_dptr[i] : in_dptr[i] * alpha_dptr[0];
}
} else {
if (conf.data_format() == "channels_first") {
const int64_t channel_num = in_blob->shape().At(1);
const int64_t area = in_blob->shape().Count(2);
FOR_RANGE(int64_t, i, 0, elem_cnt) {
int64_t c = (i / area) % channel_num;
out_dptr[i] = (in_dptr[i] >= 0) ? in_dptr[i] : in_dptr[i] * alpha_dptr[c];
}
} else if (conf.data_format() == "channels_last") {
const int64_t channel_num = in_blob->shape().At(in_blob->shape().NumAxes() - 1);
FOR_RANGE(int64_t, i, 0, elem_cnt) {
int64_t c = i % channel_num;
out_dptr[i] = (in_dptr[i] >= 0) ? in_dptr[i] : in_dptr[i] * alpha_dptr[c];
}
} else {
UNIMPLEMENTED();
}
}
}
static void Backward(const KernelCtx& ctx, const PReluOpConf& conf,
const PbRf<int32_t>& permutation, const Blob* in_blob,
const Blob* alpha_blob, const Blob* out_diff_blob, Blob* bw_buf_blob,
Blob* in_diff_blob, Blob* alpha_diff_blob) {
const T* in_dptr = in_blob->dptr<T>();
const T* alpha_dptr = alpha_blob->dptr<T>();
const T* out_diff_dptr = out_diff_blob->dptr<T>();
T* in_diff_dptr = in_diff_blob->mut_dptr<T>();
T* alpha_diff_dptr = alpha_diff_blob->mut_dptr<T>();
const int64_t elem_cnt = in_blob->shape().elem_cnt();
if (conf.data_format() == "channels_first") {
const int64_t channel_num = in_blob->shape().At(1);
const int64_t alpha_channel_num = conf.channel_shared() ? channel_num : 1;
const int64_t area = in_blob->shape().Count(2);
FOR_RANGE(int64_t, i, 0, elem_cnt) {
if (in_dptr[i] <= 0) {
int64_t c = (i / area) % channel_num / alpha_channel_num;
alpha_diff_dptr[c] += out_diff_dptr[i] * in_dptr[i];
}
if (in_dptr[i] > 0) {
in_diff_dptr[i] = out_diff_dptr[i];
} else {
int64_t c = (i / area) % channel_num / alpha_channel_num;
in_diff_dptr[i] = alpha_dptr[c] * out_diff_dptr[i];
}
}
} else if (conf.data_format() == "channels_last") {
const int64_t channel_num = in_blob->shape().At(in_blob->shape().NumAxes() - 1);
const int64_t alpha_channel_num = conf.channel_shared() ? channel_num : 1;
FOR_RANGE(int64_t, i, 0, elem_cnt) {
if (in_dptr[i] <= 0) {
int64_t c = i % channel_num / alpha_channel_num;
alpha_diff_dptr[c] += out_diff_dptr[i] * in_dptr[i];
}
if (in_dptr[i] > 0) {
in_diff_dptr[i] = out_diff_dptr[i];
} else {
int64_t c = i % channel_num / alpha_channel_num;
in_diff_dptr[i] = alpha_dptr[c] * out_diff_dptr[i];
}
}
} else {
UNIMPLEMENTED();
}
}
};
template<DeviceType device_type, typename T>
void PReluKernel<device_type, T>::InitModelBlobsWithRandomSeed(
DeviceCtx* ctx, std::mt19937* random_seed_gen,
std::function<Blob*(const std::string&)> BnInOp2Blob) const {
const auto& prelu_conf = this->op_conf().prelu_conf();
float alpha_init = prelu_conf.alpha_init();
InitializerConf alpha_init_conf;
alpha_init_conf.mutable_constant_conf()->set_value(alpha_init);
KernelUtil<device_type, T>::InitializeWithProperConf(ctx, &alpha_init_conf, 0,
BnInOp2Blob("alpha"));
}
template<DeviceType device_type, typename T>
void PReluKernel<device_type, T>::InitModelBlobsWithDir(
DeviceCtx* ctx, int32_t part_id, int32_t part_num, const std::string& model_load_dir,
std::function<Blob*(const std::string&)> BnInOp2Blob) const {
Blob* alpha_blob = BnInOp2Blob("alpha");
int32_t dim_num = alpha_blob->shape().At(0);
KernelUtil<device_type, T>::InitializeWithDir(ctx, part_id, part_num, model_load_dir, alpha_blob,
"alpha", dim_num, 1);
}
ADD_DEFAULT_KERNEL_CREATOR(OperatorConf::kPreluConf, PReluKernel, FLOATING_DATA_TYPE_SEQ);
} // namespace oneflow
#include "oneflow/core/device/cuda_util.h"
#include "oneflow/core/kernel/kernel_util.h"
#include "oneflow/core/kernel/prelu_kernel.h"
#include "oneflow/core/kernel/kernel_util.cuh"
#include "oneflow/core/device/cuda_util.h"
#include <cub/cub.cuh>
namespace oneflow {
namespace {
template<typename T>
__global__ void PReluForward(const int64_t elem_cnt, const int64_t channel_num, const int64_t area,
const T* in_dptr, const T* alpha_dptr, T* out_dptr) {
CUDA_1D_KERNEL_LOOP(i, elem_cnt) {
int64_t c = (i / area) % channel_num;
out_dptr[i] = (in_dptr[i] <= 0) ? in_dptr[i] * alpha_dptr[c] : in_dptr[i];
}
}
template<typename T>
__global__ void PReluDataBackward(const int64_t elem_cnt, const int64_t channel_num,
const int64_t area, const T* in_dptr, const T* alpha_dptr,
const T* out_dff_dptr, T* in_diff_dptr) {
CUDA_1D_KERNEL_LOOP(i, elem_cnt) {
int64_t c = (i / area) % channel_num;
in_diff_dptr[i] = (in_dptr[i] <= 0) ? out_dff_dptr[i] * alpha_dptr[c] : out_dff_dptr[i];
}
}
template<typename T>
__global__ void PReluAlphaBackward(const int64_t elem_cnt, const T* in_dptr, const T* out_diff_dptr,
T* alpha_diff_buf_dptr) {
CUDA_1D_KERNEL_LOOP(i, elem_cnt) {
alpha_diff_buf_dptr[i] = (in_dptr[i] <= 0) ? out_diff_dptr[i] * in_dptr[i] : 0;
}
}
} // namespace
template<typename T>
struct PReluKernelUtil<DeviceType::kGPU, T> {
static void Forward(const KernelCtx& ctx, const PReluOpConf& conf, const Blob* in_blob,
const Blob* alpha_blob, Blob* out_blob) {
const int64_t elem_cnt = in_blob->shape().elem_cnt();
if (conf.channel_shared()) {
PReluForward<<<BlocksNum4ThreadsNum(elem_cnt), kCudaThreadsNumPerBlock, 0,
ctx.device_ctx->cuda_stream()>>>(
elem_cnt, 1, 1, in_blob->dptr<T>(), alpha_blob->dptr<T>(), out_blob->mut_dptr<T>());
} else {
if (conf.data_format() == "channels_first") {
const int64_t channel_num = in_blob->shape().At(1);
const int64_t area = in_blob->shape().Count(2);
PReluForward<<<BlocksNum4ThreadsNum(elem_cnt), kCudaThreadsNumPerBlock, 0,
ctx.device_ctx->cuda_stream()>>>(elem_cnt, channel_num, area,
in_blob->dptr<T>(), alpha_blob->dptr<T>(),
out_blob->mut_dptr<T>());
} else if (conf.data_format() == "channels_last") {
const int64_t channel_num = in_blob->shape().At(in_blob->shape().NumAxes() - 1);
PReluForward<<<BlocksNum4ThreadsNum(elem_cnt), kCudaThreadsNumPerBlock, 0,
ctx.device_ctx->cuda_stream()>>>(elem_cnt, channel_num, 1,
in_blob->dptr<T>(), alpha_blob->dptr<T>(),
out_blob->mut_dptr<T>());
} else {
UNIMPLEMENTED();
}
}
}
static void Backward(const KernelCtx& ctx, const PReluOpConf& conf,
const PbRf<int32_t>& permutation, const Blob* in_blob,
const Blob* alpha_blob, const Blob* out_diff_blob, Blob* bw_buf_blob,
Blob* in_diff_blob, Blob* alpha_diff_blob) {
const int64_t elem_cnt = out_diff_blob->shape().elem_cnt();
// in_diff_blob acts as buffer here
PReluAlphaBackward<<<BlocksNum4ThreadsNum(elem_cnt), kCudaThreadsNumPerBlock, 0,
ctx.device_ctx->cuda_stream()>>>(
elem_cnt, in_blob->dptr<T>(), out_diff_blob->dptr<T>(), in_diff_blob->mut_dptr<T>());
if (conf.channel_shared()) {
KernelUtil<DeviceType::kGPU, T>::Sum(
ctx.device_ctx, elem_cnt, in_diff_blob->dptr<T>(), alpha_diff_blob->mut_dptr<T>(),
bw_buf_blob->mut_dptr<T>(), bw_buf_blob->ByteSizeOfDataContentField());
PReluDataBackward<<<BlocksNum4ThreadsNum(elem_cnt), kCudaThreadsNumPerBlock, 0,
ctx.device_ctx->cuda_stream()>>>(
elem_cnt, 1, 1, in_blob->dptr<T>(), alpha_blob->dptr<T>(), out_diff_blob->dptr<T>(),
in_diff_blob->mut_dptr<T>());
} else {
KernelUtil<DeviceType::kGPU, T>::Transpose(
ctx.device_ctx, in_diff_blob->shape().NumAxes(), in_diff_blob->shape(),
bw_buf_blob->shape(), permutation, in_diff_blob->shape().elem_cnt(),
in_diff_blob->dptr<T>(), bw_buf_blob->mut_dptr<T>());
CHECK_EQ(elem_cnt, bw_buf_blob->shape().elem_cnt());
if (conf.data_format() == "channels_first") {
const int64_t channel_num = out_diff_blob->shape().At(1);
CHECK_EQ(channel_num, bw_buf_blob->shape().At(0));
KernelUtil<DeviceType::kGPU, T>::RowSum(
ctx.device_ctx, channel_num, bw_buf_blob->shape().Count(1), bw_buf_blob->dptr<T>(),
alpha_diff_blob->mut_dptr<T>(), in_diff_blob->mut_dptr<T>(),
in_diff_blob->ByteSizeOfDataContentField());
PReluDataBackward<<<BlocksNum4ThreadsNum(elem_cnt), kCudaThreadsNumPerBlock, 0,
ctx.device_ctx->cuda_stream()>>>(
elem_cnt, channel_num, out_diff_blob->shape().Count(2), in_blob->dptr<T>(),
alpha_blob->dptr<T>(), out_diff_blob->dptr<T>(), in_diff_blob->mut_dptr<T>());
} else if (conf.data_format() == "channels_last") {
const int64_t channel_num = out_diff_blob->shape().At(in_blob->shape().NumAxes() - 1);
CHECK_EQ(channel_num, bw_buf_blob->shape().At(0));
KernelUtil<DeviceType::kGPU, T>::RowSum(
ctx.device_ctx, channel_num, bw_buf_blob->shape().Count(1), bw_buf_blob->dptr<T>(),
alpha_diff_blob->mut_dptr<T>(), in_diff_blob->mut_dptr<T>(),
in_diff_blob->ByteSizeOfDataContentField());
PReluDataBackward<<<BlocksNum4ThreadsNum(elem_cnt), kCudaThreadsNumPerBlock, 0,
ctx.device_ctx->cuda_stream()>>>(
elem_cnt, channel_num, 1, in_blob->dptr<T>(), alpha_blob->dptr<T>(),
out_diff_blob->dptr<T>(), in_diff_blob->mut_dptr<T>());
} else {
UNIMPLEMENTED();
}
}
}
};
#define INSTANTIATE_P_RELU_KERNEL_UTIL(type_cpp, type_proto) \
template class PReluKernelUtil<DeviceType::kGPU, type_cpp>;
OF_PP_FOR_EACH_TUPLE(INSTANTIATE_P_RELU_KERNEL_UTIL, FLOATING_DATA_TYPE_SEQ);
} // namespace oneflow
#ifndef ONEFLOW_CORE_KERNEL_PRELU_KERNEL_H_
#define ONEFLOW_CORE_KERNEL_PRELU_KERNEL_H_
#include "oneflow/core/kernel/kernel.h"
namespace oneflow {
template<DeviceType device_type, typename T>
class PReluKernel final : public KernelIf<device_type> {
public:
OF_DISALLOW_COPY_AND_MOVE(PReluKernel);
PReluKernel() = default;
~PReluKernel() = default;
private:
void ForwardDataContent(const KernelCtx&,
std::function<Blob*(const std::string&)>) const override;
void BackwardDataContent(const KernelCtx&,
std::function<Blob*(const std::string&)>) const override;
void InitModelBlobsWithRandomSeed(
DeviceCtx*, std::mt19937* random_seed_gen,
std::function<Blob*(const std::string&)> BnInOp2Blob) const override;
void InitModelBlobsWithDir(DeviceCtx*, int32_t part_id, int32_t part_num,
const std::string& model_load_dir,
std::function<Blob*(const std::string&)> BnInOp2Blob) const override;
};
template<DeviceType device_type, typename T>
struct PReluKernelUtil {
static void Forward(const KernelCtx& ctx, const PReluOpConf& conf, const Blob* in_blob,
const Blob* alpha_blob, Blob* out_blob);
static void Backward(const KernelCtx& ctx, const PReluOpConf& conf,
const PbRf<int32_t>& permutation, const Blob* in_blob,
const Blob* alpha_blob, const Blob* out_diff_blob, Blob* bw_buf_blob,
Blob* in_diff_blob, Blob* alpha_diff_blob);
};
} // namespace oneflow
#endif // ONEFLOW_CORE_KERNEL_PRELU_KERNEL_H_
......@@ -206,6 +206,14 @@ message ReluOpConf {
required string out = 2;
}
message PReluOpConf {
required string in = 1;
required string out = 2;
required string data_format = 3;
optional bool channel_shared = 4 [default = false];
optional float alpha_init = 5 [default = 0.25];
}
message SigmoidOpConf {
required string in = 1;
required string out = 2;
......@@ -1080,6 +1088,7 @@ message OperatorConf {
TopKOpConf top_k_conf = 262;
ParallelCastOpConf parallel_cast_conf = 263;
L2NormalizeOpConf l2_normalize_conf = 264;
PReluOpConf prelu_conf = 265;
// math op
BroadcastAddOpConf broadcast_add_conf = 500;
......
#include "oneflow/core/operator/prelu_op.h"
#include "oneflow/core/register/runtime_blob_desc.h"
namespace oneflow {
void PReluOp::InitFromOpConf() {
CHECK(op_conf().has_prelu_conf());
StrFieldTolower("data_format");
EnrollInputBn("in");
EnrollModelBn("alpha");
EnrollOutputBn("out");
if (device_type() == DeviceType::kGPU) { EnrollBwBufBn("bw_buf"); }
}
const PbMessage& PReluOp::GetCustomizedConf() const { return op_conf().prelu_conf(); }
void PReluOp::InferBlobDescs(std::function<BlobDesc*(const std::string&)> GetBlobDesc4BnInOp,
const ParallelContext* parallel_ctx) const {
const PReluOpConf& conf = op_conf().prelu_conf();
const BlobDesc* in_blob_desc = GetBlobDesc4BnInOp("in");
*GetBlobDesc4BnInOp("out") = *in_blob_desc;
BlobDesc* alpha_blob_desc = GetBlobDesc4BnInOp("alpha");
if (conf.channel_shared()) {
alpha_blob_desc->mut_shape() = Shape({1});
} else {
if (conf.data_format() == "channels_first") {
alpha_blob_desc->mut_shape() = Shape({in_blob_desc->shape().At(1)});
} else if (conf.data_format() == "channels_last") {
alpha_blob_desc->mut_shape() =
Shape({in_blob_desc->shape().At(in_blob_desc->shape().NumAxes() - 1)});
} else {
UNIMPLEMENTED();
}
}
alpha_blob_desc->set_data_type(in_blob_desc->data_type());
}
void PReluOp::InferBwBufBlobDescs(std::function<BlobDesc*(const std::string&)> GetBlobDesc4BnInOp,
const ParallelContext*) const {
if (device_type() == DeviceType::kGPU) {
BlobDesc* bw_buf_desc = GetBlobDesc4BnInOp("bw_buf");
if (op_conf().prelu_conf().channel_shared()) {
*bw_buf_desc = *GetBlobDesc4BnInOp("in");
} else {
const PReluOpConf& conf = op_conf().prelu_conf();
const BlobDesc* in_blob_desc = GetBlobDesc4BnInOp("in");
bw_buf_desc->set_data_type(in_blob_desc->data_type());
std::vector<int64_t> bw_buf_shape_vec = in_blob_desc->shape().dim_vec();
if (conf.data_format() == "channels_first") {
bw_buf_shape_vec[0] = in_blob_desc->shape().At(1);
bw_buf_shape_vec[1] = in_blob_desc->shape().At(0);
bw_buf_desc->mut_shape() = Shape(bw_buf_shape_vec);
} else if (conf.data_format() == "channels_last") {
bw_buf_shape_vec[0] = in_blob_desc->shape().At(in_blob_desc->shape().NumAxes() - 1);
bw_buf_shape_vec[in_blob_desc->shape().NumAxes() - 1] = in_blob_desc->shape().At(0);
bw_buf_desc->mut_shape() = Shape(bw_buf_shape_vec);
} else {
UNIMPLEMENTED();
}
}
}
}
void PReluOp::VirtualFixParallelDesc(ParallelDesc* pr_desc) const {
pr_desc->set_policy(ParallelPolicy::kDataParallel);
}
void PReluOp::VirtualGenKernelConf(
std::function<const BlobDesc*(const std::string&)> GetBlobDesc4BnInOp,
const ParallelContext* parallel_ctx, KernelConf* kernel_conf) const {
const PReluOpConf& conf = op_conf().prelu_conf();
PbRf<int32_t>* perm = kernel_conf->mutable_prelu_conf()->mutable_perm();
const BlobDesc* in_blob_desc = GetBlobDesc4BnInOp("in");
int64_t num_axes = in_blob_desc->shape().NumAxes();
FOR_RANGE(int64_t, i, 0, num_axes) { perm->Add(i); }
if (!conf.channel_shared()) {
if (conf.data_format() == "channels_first") {
(*perm)[0] = 1;
(*perm)[1] = 0;
} else if (conf.data_format() == "channels_last") {
(*perm)[num_axes - 1] = 0;
(*perm)[0] = num_axes - 1;
} else {
UNIMPLEMENTED();
}
}
}
REGISTER_OP(OperatorConf::kPreluConf, PReluOp);
} // namespace oneflow
#ifndef ONEFLOW_CORE_OPERATOR_PRELU_OP_H_
#define ONEFLOW_CORE_OPERATOR_PRELU_OP_H_
#include "oneflow/core/operator/operator.h"
namespace oneflow {
class PReluOp final : public Operator {
public:
OF_DISALLOW_COPY_AND_MOVE(PReluOp);
PReluOp() = default;
~PReluOp() = default;
void InitFromOpConf() override;
const PbMessage& GetCustomizedConf() const override;
bool NeedInBlobWhenBackward() const override { return true; }
bool NeedOutBlobWhenBackward() const override { return false; }
void InferBlobDescs(std::function<BlobDesc*(const std::string&)> GetBlobDesc4BnInOp,
const ParallelContext* parallel_ctx) const override;
void InferBwBufBlobDescs(std::function<BlobDesc*(const std::string&)> GetBlobDesc4BnInOp,
const ParallelContext*) const override;
void VirtualFixParallelDesc(ParallelDesc* pr_desc) const override;
private:
void VirtualGenKernelConf(std::function<const BlobDesc*(const std::string&)> GetBlobDesc4BnInOp,
const ParallelContext*, KernelConf*) const override;
bool IsInputBlobAllowedModelSplit(const std::string& ibn) const override { return false; }
};
} // namespace oneflow
#endif // ONEFLOW_CORE_OPERATOR_PRELU_OP_H_
......@@ -17,6 +17,11 @@ message ImageCrop {
required int32 height = 5;
}
message ImageCenterCrop {
required int32 width = 1;
required int32 height = 2;
}
message ImageCropWithRandomSize {
message AspectRatioRange {
optional float min = 1 [default = 0.75];
......@@ -35,7 +40,8 @@ message ImagePreprocess {
oneof preprocess {
ImageResize resize = 1;
ImageCrop crop = 2;
ImageCropWithRandomSize crop_with_random_size = 3;
ImageMirror mirror = 4;
ImageCenterCrop center_crop = 3;
ImageCropWithRandomSize crop_with_random_size = 4;
ImageMirror mirror = 5;
}
}
......@@ -44,6 +44,36 @@ void ImagePreprocessImpl<PreprocessCase::kCrop>::DoPreprocess(
*image = (*image)(cv::Rect(x, y, width, height));
}
void ImagePreprocessImpl<PreprocessCase::kCenterCrop>::DoPreprocess(
cv::Mat* image, const ImagePreprocess& preprocess_conf,
std::function<int32_t(void)> NextRandomInt) const {
CHECK(preprocess_conf.has_center_crop());
const ImageCenterCrop& conf = preprocess_conf.center_crop();
int32_t width = conf.width();
int32_t height = conf.height();
int32_t middle_width = -1;
int32_t middle_height = -1;
float crop_aspect_ratio = width * 1.0 / height;
CHECK_GT(crop_aspect_ratio, 0);
if ((image->cols * 1.0 / image->rows) >= crop_aspect_ratio) {
middle_height = image->rows;
middle_width = static_cast<int32_t>(middle_height * crop_aspect_ratio);
} else {
middle_width = image->cols;
middle_height = static_cast<int32_t>(middle_width / crop_aspect_ratio);
}
CHECK_GT(middle_width, 0);
CHECK_GT(middle_height, 0);
int32_t x = (image->cols - middle_width) / 2;
int32_t y = (image->rows - middle_height) / 2;
CHECK_GE(x, 0);
CHECK_GE(y, 0);
*image = (*image)(cv::Rect(x, y, middle_width, middle_height));
cv::Mat dst;
cv::resize(*image, dst, cv::Size(width, height), 0, 0, cv::INTER_LINEAR);
*image = dst;
}
void ImagePreprocessImpl<PreprocessCase::kCropWithRandomSize>::DoPreprocess(
cv::Mat* image, const ImagePreprocess& preprocess_conf,
std::function<int32_t(void)> NextRandomInt) const {
......
......@@ -42,6 +42,14 @@ class ImagePreprocessImpl<PreprocessCase::kCrop> final : public ImagePreprocessI
std::function<int32_t(void)> NextRandomInt) const override;
};
template<>
class ImagePreprocessImpl<PreprocessCase::kCenterCrop> final : public ImagePreprocessIf {
public:
private:
void DoPreprocess(cv::Mat* image, const ImagePreprocess& preprocess_conf,
std::function<int32_t(void)> NextRandomInt) const override;
};
template<>
class ImagePreprocessImpl<PreprocessCase::kCropWithRandomSize> final : public ImagePreprocessIf {
public:
......@@ -58,10 +66,11 @@ class ImagePreprocessImpl<PreprocessCase::kMirror> final : public ImagePreproces
std::function<int32_t(void)> NextRandomInt) const override;
};
#define PREPROCESS_CASE_SEQ \
OF_PP_MAKE_TUPLE_SEQ(PreprocessCase::kResize) \
OF_PP_MAKE_TUPLE_SEQ(PreprocessCase::kMirror) \
OF_PP_MAKE_TUPLE_SEQ(PreprocessCase::kCrop) \
#define PREPROCESS_CASE_SEQ \
OF_PP_MAKE_TUPLE_SEQ(PreprocessCase::kResize) \
OF_PP_MAKE_TUPLE_SEQ(PreprocessCase::kMirror) \
OF_PP_MAKE_TUPLE_SEQ(PreprocessCase::kCrop) \
OF_PP_MAKE_TUPLE_SEQ(PreprocessCase::kCenterCrop) \
OF_PP_MAKE_TUPLE_SEQ(PreprocessCase::kCropWithRandomSize)
ImagePreprocessIf* GetImagePreprocess(PreprocessCase);
......
......@@ -31,6 +31,7 @@ void Blob::Init(Regst* regst, const RtBlobDesc* blob_desc, char* header_ptr, cha
loss_instance_num_ptr_ = header_pod_ptr_.MutTensorPtr<float>(FieldKey::kLossInstanceNum, nullptr);
dptr_ = body_ptr;
dynamic_shape_ = blob_desc->shape();
record_num_ = -1;
}
const char* Blob::data_id(int32_t no) const {
......@@ -182,6 +183,8 @@ const Shape& Blob::dynamic_shape() const {
return dynamic_shape_;
}
const int32_t& Blob::record_num() const { return record_num_; }
void Blob::set_record_num(int32_t val) { record_num_ = val; }
int32_t Blob::col_id() const { return regst_->col_id(); }
void Blob::set_col_id(int32_t val) { regst_->set_col_id(val); }
int32_t Blob::max_col_id() const { return regst_->max_col_id(); }
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment