Skip to content
Snippets Groups Projects
Unverified Commit 47aba214 authored by Shenghang Tsai's avatar Shenghang Tsai Committed by GitHub
Browse files

Dev pure cpu (#3398)


* cmake dont panic when build cuda

* naive changes

* fix cudaMemcpyKind

* fix acc actor

* fix actor

* fix gdb

* fix vm

* fix work type

* fix cuda type

* fix cuda type

* fix collective backend

* fix global scope

* amp

* rm PackKernelUtil gpu

* fix log

* fix rand

* fix sync size

* fix allocator

* fix vm

* fix kernel

* fix kernel

* fix kernels

* fix kernel

* fix softmax

* fix kernels

* fix reshape kernels

* add workaround

* try fix symbol not found

* fix vm

* fix vm

* fix jpeg

* fix broadcast gpu

* fix broadcast like

* fix transpose

* fix matmul

* fix CopyElemOnGpu

* fix sigmoid

* fix sigmoid and softmax

* fix relu

* fix sparse cross entropy

* fix kernels

* fix tanh

* fix same padding

* fix softmax

* fix undefined symbol: gzgets

* fix CopyField

* fix scalar add

* fix CopyNDGpuImpl

* copier

* fix slice boxing

* fix mem copier

* fix zero like

* fix acc actor

* fix dev pure cpu (#3410)

* add pure cpu message

* add default for src dir in ci

* add cpu workflow

* fix extra_oneflow_cmake_args

* fix link problem add update readme

* fix _GetDefaultConfigProto

* use gpu

* rm default value for gpu dev num

* rm cpu ci

* check in skip code

* refine ci and add back

* add arg for appendix

* fix env arg

* fix env arg

* rm make nccl

* add back to pass gpu ci

* get with cuda in py

* fix fmt

* fix test kernels

* fix activation

* add skips

* fmt

* skip all reduce

* fix assign

* fix bn

* fix bn

* rm tmp_wheel

* rm tmp_wheel

* move clean up

* for CPU-only OneFlow make gpu_device_num equivalent to cpu_device_num

* change warning to info

* only run 3 iters for cpu

* skip bert for cpu

* fix check

* add warning

* add cpu Integration test

* Dev pure cpu test cases (#3422)

* fix test cpu cases

* change os.getenv('ONEFLOW_TEST_CPU_ONLY') == 'True' to os.getenv('ONEFLOW_TEST_CPU_ONLY')

* print traceback for info and warning

* fix test cpu cases

* add more info on why skip check of resnet

* skip gan in cpu

Co-authored-by: default avatartsai <caishenghang@1f-dev.kbaeegfb1x0ubnoznzequyxzve.bx.internal.cloudapp.net>
Co-authored-by: default avatartsai <caishenghang@oneflow.org>
Co-authored-by: default avataroneflow-bot <69100618+oneflow-bot@users.noreply.github.com>
Co-authored-by: default avatarJackieWu <wkcn@live.cn>
Co-authored-by: default avatarOuYang Yu <xuanjiuye@gmail.com>
parent e5e3eb31
No related branches found
No related tags found
No related merge requests found
Showing
with 183 additions and 48 deletions
......@@ -20,6 +20,10 @@ jobs:
runs-on: [self-hosted, linux, gpu]
if: github.event.pull_request.draft == false
steps:
- name: Clean environment
run: |
rm -rf build/third_party
bash ci/build/clean.sh
- uses: actions/checkout@v2
- name: Check license (please run 'make of_format' if failed)
run: |
......@@ -33,8 +37,6 @@ jobs:
- name: Setup environment
run: |
echo $HOSTNAME
rm -rf build/third_party
bash ci/build/clean.sh
bash ci/setup_submodule.sh
- name: Checkout submodules
shell: bash
......@@ -43,6 +45,7 @@ jobs:
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --recursive
- name: Build OneFlow
run: |
ONEFLOW_CI_PACKAGE_APPENDIX="_cu102" \
bash ci/build/make.sh
- name: Build docker image for testing
run: |
......@@ -107,3 +110,54 @@ jobs:
if: ${{ always() }}
run: |
bash ci/build/clean.sh
build_and_test_cpu:
runs-on: [self-hosted, linux, gpu]
if: github.event.pull_request.draft == false
steps:
- name: Clean environment
run: |
rm -rf build/third_party
bash ci/build/clean.sh
- uses: actions/checkout@v2
- name: Setup environment
run: |
echo $HOSTNAME
bash ci/setup_submodule.sh
- name: Checkout submodules
shell: bash
run: |
auth_header="$(git config --local --get http.https://github.com/.extraheader)"
git -c "http.extraheader=$auth_header" -c protocol.version=2 submodule update --init --recursive
- name: Build OneFlow
run: |
export ONEFLOW_CI_EXTRA_ONEFLOW_CMAKE_ARGS="-DBUILD_CUDA=OFF"
export ONEFLOW_CI_TMP_DIR=$HOME/ci-tmp-cpu
bash ci/build/make.sh
- name: Build docker image for testing
run: |
bash docker/ci/test/build.sh
- name: Unit test
run: |
docker run --shm-size=8g --rm \
-v $HOME/ci-tmp-cpu:/ci-tmp \
-w $PWD -v $PWD:$PWD -v /dataset:/dataset -v /model_zoo:/model_zoo \
--env ONEFLOW_WHEEL_PATH=/ci-tmp/wheelhouse \
--env ONEFLOW_TEST_CPU_ONLY=1 \
oneflow-test \
bash -c "bash ci/test/try_install.sh && bash ci/test/1node_op_test.sh"
- name: Integration test
run: |
docker run --shm-size=8g --rm \
-v $HOME/ci-tmp-cpu:/ci-tmp \
-w $PWD -v $PWD:$PWD -v /dataset:/dataset -v /model_zoo:/model_zoo \
--env ONEFLOW_WHEEL_PATH=/ci-tmp/wheelhouse \
--env ONEFLOW_TEST_CPU_ONLY=1 \
oneflow-test \
bash -c "bash ci/test/try_install.sh && bash ci/test/1node_model_test.sh"
- name: Clean up files created by root
if: ${{ always() }}
run: |
ONEFLOW_CI_TMP_DIR=$HOME/ci-tmp-cpu \
bash ci/build/clean.sh
......@@ -122,6 +122,8 @@
make pip_install
```
- For pure CPU build, please add this CMake flag `-DBUILD_CUDA=OFF`.
### Troubleshooting
Please refer to [troubleshooting](docs/source/troubleshooting.md) for common issues you might encounter when compiling and running OneFlow.
......
set -ex
tmp_dir=${ONEFLOW_CI_TMP_DIR:-"$HOME/ci-tmp"}
docker run --rm \
-v $HOME/ci-tmp:/ci-tmp \
-w $HOME/ci-tmp:/ci-tmp busybox rm -rf /ci-tmp/wheelhouse
-v $tmp_dir:/ci-tmp \
-w $tmp_dir:/ci-tmp busybox rm -rf /ci-tmp/wheelhouse
docker run --rm -v $PWD:/p -w /p busybox rm -rf tmp_wheel
docker run --rm -v $PWD:/p -w /p busybox rm -rf build
......@@ -2,6 +2,8 @@ set -ex
src_dir=${ONEFLOW_SRC_DIR:-"$PWD"}
tmp_dir=${ONEFLOW_CI_TMP_DIR:-"$HOME/ci-tmp"}
extra_oneflow_cmake_args=${ONEFLOW_CI_EXTRA_ONEFLOW_CMAKE_ARGS:-""}
package_appendix=${ONEFLOW_CI_PACKAGE_APPENDIX:-""}
mkdir -p $tmp_dir
docker_tag=${ONEFLOW_CI_DOCKER_TAG:-"oneflow:ci-manylinux2014-cuda10.2"}
......@@ -35,7 +37,8 @@ function build() {
"$docker_tag" \
/oneflow-src/docker/package/manylinux/build_wheel.sh \
--python3.6 \
--package-name oneflow_cu102
--package-name oneflow${package_appendix} \
$extra_oneflow_cmake_args
}
set +e
......
set -x
set -e
python3 ci/setup_submodule.py --oneflow_src_local_path=${ONEFLOW_CI_SRC_DIR}
src_dir=${ONEFLOW_CI_SRC_DIR:-"$HOME/oneflow"}
python3 ci/setup_submodule.py --oneflow_src_local_path=$src_dir
git submodule sync
git submodule update --init --recursive
# main cpp
list(APPEND of_main_cc ${PROJECT_SOURCE_DIR}/oneflow/core/job/oneflow_worker.cpp)
# TODO(tsai): skip for now, fail to link when building CPU only
if (BUILD_CUDA)
list(APPEND of_main_cc ${PROJECT_SOURCE_DIR}/oneflow/core/job/oneflow_worker.cpp)
endif()
function(oneflow_add_executable)
if (BUILD_CUDA)
cuda_add_executable(${ARGV})
......@@ -291,6 +293,14 @@ add_custom_target(of_pyscript_copy ALL
COMMAND ${Python_EXECUTABLE} "${PROJECT_SOURCE_DIR}/tools/generate_oneflow_symbols_export_file.py"
"${PROJECT_SOURCE_DIR}" "${of_pyscript_dir}/oneflow/python/__export_symbols__.py")
file(GLOB_RECURSE oneflow_all_python_file "${PROJECT_SOURCE_DIR}/oneflow/python/*.py")
if (BUILD_CUDA)
add_custom_command(TARGET of_pyscript_copy POST_BUILD
COMMAND echo "with_cuda=True" >> "${of_pyscript_dir}/oneflow/python/compatibility.py")
else()
add_custom_command(TARGET of_pyscript_copy POST_BUILD
COMMAND echo "with_cuda=False" >> "${of_pyscript_dir}/oneflow/python/compatibility.py")
endif()
copy_files("${oneflow_all_python_file}" "${PROJECT_SOURCE_DIR}" "${of_pyscript_dir}" of_pyscript_copy)
file(WRITE ${of_pyscript_dir}/oneflow/python/framework/sysconfig_gen.py "generated_compile_flags = []\n")
......@@ -334,28 +344,29 @@ endforeach()
# build test
if(BUILD_TESTING)
if(NOT BUILD_CUDA)
message(FATAL_ERROR "BUILD_TESTING without BUILD_CUDA")
endif()
if (of_all_test_cc)
oneflow_add_executable(oneflow_testexe ${of_all_test_cc})
target_link_libraries(oneflow_testexe ${of_libs} ${oneflow_third_party_libs})
set_target_properties(oneflow_testexe PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
add_test(NAME oneflow_test COMMAND oneflow_testexe)
# foreach(cc ${of_all_test_cc})
# get_filename_component(test_name ${cc} NAME_WE)
# string(CONCAT test_exe_name ${test_name} exe)
# oneflow_add_executable(${test_exe_name} ${cc})
# target_link_libraries(${test_exe_name} ${of_libs} ${oneflow_third_party_libs})
# endforeach()
endif()
if (of_separate_test_cc)
foreach(cc ${of_separate_test_cc})
get_filename_component(test_name ${cc} NAME_WE)
string(CONCAT test_exe_name ${test_name} exe)
oneflow_add_executable(${test_exe_name} ${cc})
target_link_libraries(${test_exe_name} ${of_libs} ${oneflow_third_party_libs})
endforeach()
if(BUILD_CUDA)
if (of_all_test_cc)
oneflow_add_executable(oneflow_testexe ${of_all_test_cc})
target_link_libraries(oneflow_testexe ${of_libs} ${oneflow_third_party_libs})
set_target_properties(oneflow_testexe PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
add_test(NAME oneflow_test COMMAND oneflow_testexe)
# foreach(cc ${of_all_test_cc})
# get_filename_component(test_name ${cc} NAME_WE)
# string(CONCAT test_exe_name ${test_name} exe)
# oneflow_add_executable(${test_exe_name} ${cc})
# target_link_libraries(${test_exe_name} ${of_libs} ${oneflow_third_party_libs})
# endforeach()
endif()
if (of_separate_test_cc)
foreach(cc ${of_separate_test_cc})
get_filename_component(test_name ${cc} NAME_WE)
string(CONCAT test_exe_name ${test_name} exe)
oneflow_add_executable(${test_exe_name} ${cc})
target_link_libraries(${test_exe_name} ${of_libs} ${oneflow_third_party_libs})
endforeach()
endif()
else()
message(ERROR "BUILD_TESTING=ON has no effect when BUILD_CUDA=OFF")
endif()
endif()
......
......@@ -103,12 +103,12 @@ set(oneflow_third_party_libs
${GOOGLEMOCK_STATIC_LIBRARIES}
${PROTOBUF_STATIC_LIBRARIES}
${GRPC_STATIC_LIBRARIES}
${ZLIB_STATIC_LIBRARIES}
${farmhash_STATIC_LIBRARIES}
${BLAS_LIBRARIES}
${LIBJPEG_STATIC_LIBRARIES}
${OPENCV_STATIC_LIBRARIES}
${COCOAPI_STATIC_LIBRARIES}
${LIBJPEG_STATIC_LIBRARIES}
${ZLIB_STATIC_LIBRARIES}
)
if (NOT WITH_XLA)
......
......@@ -58,8 +58,8 @@ if [[ $SKIP_THIRD_PARTY != 1 ]]; then
cmake -DTHIRD_PARTY=ON \
$COMMON_CMAKE_ARGS \
-DONEFLOW=OFF \
$EXTRA_ONEFLOW_CMAKE_ARGS \
$ONEFLOW_SRC_DIR
make -j nccl
make -j`nproc` prepare_oneflow_third_party
popd
......@@ -86,7 +86,7 @@ do
cmake -DTHIRD_PARTY=OFF -DONEFLOW=ON\
$COMMON_CMAKE_ARGS \
-DPython3_ROOT_DIR=$PY_ROOT \
$EXTRA_ONEFLOW_CMAKE_ARGS \
$EXTRA_ONEFLOW_CMAKE_ARGS \
$ONEFLOW_SRC_DIR
cmake --build . -j `nproc`
popd
......
......@@ -21,12 +21,7 @@ void AccumulateCompActor::Init(const TaskProto& task_proto, int32_t max_acc_cnt,
using namespace std::placeholders;
order_ = order;
if (GetDeviceType() == DeviceType::kCPU) {
cpy_func_ = std::bind(Memcpy<DeviceType::kCPU>, _1, _2, _3, _4
#ifdef WITH_CUDA
,
cudaMemcpyHostToHost
#endif
);
cpy_func_ = std::bind(Memcpy<DeviceType::kCPU>, _1, _2, _3, _4, cudaMemcpyHostToHost);
} else {
#ifdef WITH_CUDA
cpy_func_ = std::bind(Memcpy<DeviceType::kGPU>, _1, _2, _3, _4, cudaMemcpyDeviceToDevice);
......@@ -54,8 +49,12 @@ void AccumulateCompActor::Act() {
Memset<DeviceType::kCPU>(kernel_ctx.device_ctx, out_blob->mut_dptr(), 0,
out_blob->ByteSizeOfBlobBody());
} else if (GetDeviceType() == DeviceType::kGPU) {
#ifdef WITH_CUDA
Memset<DeviceType::kGPU>(kernel_ctx.device_ctx, out_blob->mut_dptr(), 0,
out_blob->ByteSizeOfBlobBody());
#else
UNIMPLEMENTED();
#endif
} else {
UNIMPLEMENTED();
}
......
......@@ -236,6 +236,7 @@ void Actor::InitDeviceCtx(const ThreadCtx& thread_ctx) {
device_ctx_.reset(new CpuDeviceCtx());
break;
}
#ifdef WITH_CUDA
case DeviceType::kGPU: {
CudaStreamHandle* cuda_handle = nullptr;
CHECK_EQ(GetLocalWorkStreamId(), 0);
......@@ -243,6 +244,7 @@ void Actor::InitDeviceCtx(const ThreadCtx& thread_ctx) {
device_ctx_.reset(new CudaDeviceCtx(cuda_handle));
break;
}
#endif
default: { UNIMPLEMENTED(); }
}
}
......
......@@ -18,7 +18,9 @@ limitations under the License.
#include <type_traits>
#include <utility>
#ifdef WITH_CUDA
#include <cuda_fp16.h>
#endif // WITH_CUDA
#include "oneflow/core/common/cblas.h"
#include "oneflow/core/common/preprocessor.h"
......
......@@ -32,14 +32,22 @@ namespace {
static char* MallocThenCpyD2H(const char* gpu_src, size_t size) {
char* cpu_dst = reinterpret_cast<char*>(malloc(size));
#ifdef WITH_CUDA
cudaMemcpy(cpu_dst, gpu_src, size, cudaMemcpyDeviceToHost);
#else
UNIMPLEMENTED();
#endif
return cpu_dst;
}
static void CpyH2DThenFree(char* gpu_dst, char* cpu_src, size_t size) {
#ifdef WITH_CUDA
cudaMemcpy(gpu_dst, cpu_src, size, cudaMemcpyHostToDevice);
#else
UNIMPLEMENTED();
#endif
free(cpu_src);
}
} // namespace
template<typename T>
void LoadFromStrFile(T* buf, const std::string& file_name) {
......
......@@ -123,6 +123,16 @@ class CudaCurrentDeviceGuard final {
} // namespace oneflow
#else
namespace oneflow {
enum class CudaWorkType {};
inline size_t GetCudaWorkTypeSize() { return 0; }
} // namespace oneflow
#endif // WITH_CUDA
#endif // ONEFLOW_CORE_DEVICE_CUDA_UTIL_H_
......@@ -251,13 +251,11 @@ void CudaAsyncMemoryCopier::CopyND(DeviceCtx* ctx, void* dst, const void* src,
UNIMPLEMENTED();
}
}
#endif
REGISTER_DEFAULT_MEMORY_COPIER(DeviceType::kCPU, []() { return new HostMemoryCopier(); });
#ifdef WITH_CUDA
REGISTER_DEFAULT_MEMORY_COPIER(DeviceType::kGPU, []() { return new CudaAsyncMemoryCopier(); });
#endif
MemoryCopier* NewDefaultMemoryCopier(DeviceType device_type) {
......@@ -266,8 +264,6 @@ MemoryCopier* NewDefaultMemoryCopier(DeviceType device_type) {
->Create();
}
#endif
#define SPECIALIZE_COPY_ELEM(dtype) \
template void MemoryCopier::CopyElem<dtype>(DeviceCtx * ctx, void* dst, const void* src, \
const MemoryCopyNdDesc& desc) const;
......
......@@ -35,8 +35,10 @@ struct MemoryCopyNdDesc {
template<int32_t NDIMS>
void CopyNDCpuImpl(DeviceCtx* ctx, void* dst, const void* src, const MemoryCopyNdDesc& desc);
#ifdef WITH_CUDA
template<int32_t NDIMS>
void CopyNDGpuImpl(DeviceCtx* ctx, void* dst, const void* src, const MemoryCopyNdDesc& desc);
#endif
class MemoryCopier {
public:
......
......@@ -35,6 +35,7 @@ FLAT_MSG_VIEW_END(PinBlobInstruction);
} // namespace
#ifdef WITH_CUDA
class CudaHostRegisterBlobInstructionType final : public vm::InstructionType {
public:
CudaHostRegisterBlobInstructionType() = default;
......@@ -84,6 +85,7 @@ class CudaHostUnregisterBlobInstructionType final : public vm::InstructionType {
};
COMMAND(
vm::RegisterInstructionType<CudaHostUnregisterBlobInstructionType>("CudaHostUnregisterBlob"));
#endif
} // namespace eager
} // namespace oneflow
......@@ -13,6 +13,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#ifdef WITH_CUDA
#include "oneflow/core/common/util.h"
#include "oneflow/core/job/job_desc.h"
#include "oneflow/core/eager/opkernel_object.h"
......@@ -143,3 +145,5 @@ COMMAND(vm::RegisterInstructionType<GpuFeedBlobInstructionType>("gpu.FeedBlob"))
} // namespace eager
} // namespace oneflow
#endif
......@@ -83,6 +83,7 @@ Maybe<void> SliceBoxingSubTskGphBuilder::Build(
return Error::BoxingNotSupported();
}
const auto GetBoxingGpuThrdId = [](const int64_t dev_id, CudaWorkType work_type) -> int64_t {
#ifdef WITH_CUDA
if (work_type == CudaWorkType::kCopyH2D) {
return Global<IDMgr>::Get()->GetGpuH2DThrdId(dev_id);
} else if (work_type == CudaWorkType::kCopyD2H) {
......@@ -90,7 +91,11 @@ Maybe<void> SliceBoxingSubTskGphBuilder::Build(
} else {
return Global<IDMgr>::Get()->GetGpuMixThrdId(dev_id);
}
#else
UNIMPLEMENTED();
#endif
};
const auto NewEdge = [&ctx]() -> TaskEdge* { return ctx->task_graph()->NewEdge(); };
const auto CreateBoxingNode121 = [&ctx, &lbi, &GetBoxingGpuThrdId](
const ParallelDesc& pd, const int64_t parallel_id,
......@@ -102,7 +107,11 @@ Maybe<void> SliceBoxingSubTskGphBuilder::Build(
if (pd.device_type() == DeviceType::kCPU) {
thrd_id = Global<IDMgr>::Get()->PickCpuThrdIdEvenly(machine_id);
} else if (pd.device_type() == DeviceType::kGPU) {
#ifdef WITH_CUDA
thrd_id = GetBoxingGpuThrdId(pd.DeviceIdForParallelId(parallel_id), CudaWorkType::kCopyH2D);
#else
UNIMPLEMENTED();
#endif
} else {
UNIMPLEMENTED();
}
......@@ -118,7 +127,11 @@ Maybe<void> SliceBoxingSubTskGphBuilder::Build(
if (src_node->device_type() == DeviceType::kCPU) {
thrd_id = Global<IDMgr>::Get()->PickCpuThrdIdEvenly(src_node->machine_id());
} else if (src_node->device_type() == DeviceType::kGPU) {
#ifdef WITH_CUDA
thrd_id = GetBoxingGpuThrdId(src_node->GpuPhyId(), CudaWorkType::kCopyD2H);
#else
UNIMPLEMENTED();
#endif
} else {
UNIMPLEMENTED();
}
......@@ -235,9 +248,13 @@ Maybe<void> SliceBoxingSubTskGphBuilder::Build(
if (in_pd.device_type() == DeviceType::kCPU) {
local_concat_thrd_id = Global<IDMgr>::Get()->PickCpuThrdIdEvenly(in_machine_id);
} else if (in_pd.device_type() == DeviceType::kGPU) {
#ifdef WITH_CUDA
local_concat_thrd_id = GetBoxingGpuThrdId(
in_nodes.at(in_parallel_ids.at(out_id % in_parallel_ids.size()))->GpuPhyId(),
CudaWorkType::kCopyD2H);
#else
UNIMPLEMENTED();
#endif
}
local_concat_node->Init(lbi, concat_slice, kSliceBoxingTaskModeCopy, in_machine_id,
local_concat_thrd_id, Global<IDMgr>::Get()->CpuMemZoneId());
......@@ -293,9 +310,13 @@ Maybe<void> SliceBoxingSubTskGphBuilder::Build(
if (in_pd.device_type() == DeviceType::kCPU) {
local_add_thrd_id = Global<IDMgr>::Get()->PickCpuThrdIdEvenly(in_machine_id);
} else if (in_pd.device_type() == DeviceType::kGPU) {
#ifdef WITH_CUDA
local_add_thrd_id = GetBoxingGpuThrdId(
in_nodes.at(in_parallel_ids.at(out_id % in_parallel_ids.size()))->GpuPhyId(),
CudaWorkType::kCopyD2H);
#else
UNIMPLEMENTED();
#endif
}
local_add_node->Init(lbi, out_slice, kSliceBoxingTaskModeAdd, in_machine_id,
local_add_thrd_id, Global<IDMgr>::Get()->CpuMemZoneId());
......@@ -337,8 +358,12 @@ Maybe<void> SliceBoxingSubTskGphBuilder::Build(
if (in_pd.device_type() == DeviceType::kCPU) {
local_add_thrd_id = Global<IDMgr>::Get()->PickCpuThrdIdEvenly(in_machine_id);
} else if (in_pd.device_type() == DeviceType::kGPU) {
#ifdef WITH_CUDA
local_add_thrd_id = GetBoxingGpuThrdId(in_nodes.at(in_ids_on_machine.front())->GpuPhyId(),
CudaWorkType::kCopyH2D);
#else
UNIMPLEMENTED();
#endif
}
local_add_node->Init(lbi, slice, kSliceBoxingTaskModeAdd, in_machine_id, local_add_thrd_id);
FOR_RANGE(int64_t, i, 0, in_ids_on_machine.size()) {
......
......@@ -30,7 +30,13 @@ class CaseCompTaskNode final : public CompTaskNode {
void ConsumeAllRegsts() override;
TaskType GetTaskType() const override { return TaskType::kCase; }
CudaWorkType GetCudaWorkType() const override { return CudaWorkType::kCompute; }
CudaWorkType GetCudaWorkType() const override {
#ifdef WITH_CUDA
return CudaWorkType::kCompute;
#else
UNIMPLEMENTED();
#endif
}
private:
void BuildExecGphAndRegst() override;
......
......@@ -29,7 +29,13 @@ class CompTaskNode : public TaskNode {
CompTaskNode() = default;
virtual ~CompTaskNode() = default;
virtual CudaWorkType GetCudaWorkType() const { return CudaWorkType::kCompute; }
virtual CudaWorkType GetCudaWorkType() const {
#ifdef WITH_CUDA
return CudaWorkType::kCompute;
#else
UNIMPLEMENTED();
#endif
}
virtual void ToProto(TaskProto*) override;
// parallel_ctx_
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment