Skip to content
Snippets Groups Projects
Unverified Commit b5e3e4c1 authored by Yinggang Wang's avatar Yinggang Wang Committed by GitHub
Browse files

feat eager optimizer (#4680)


* feat(Optim): add sgd

* fix(*): add no_grad guard in optimizer

* fix(*): fix bug in optimizer test

* style(*): format codes

* style(Optimizer): refine codes

* style(Optimizer): refine codes

* fix(Optimizer): fix bug in ParamGroup

* fix(*): add __init__.py

Co-authored-by: default avataroneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
parent 5ce04915
No related branches found
No related tags found
No related merge requests found
"""
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from typing import List, Dict, Callable, Union, Any, Iterator
from types import GeneratorType
import numpy as np
import oneflow as flow
from oneflow.python.oneflow_export import oneflow_export
from oneflow.python.nn.parameter import Parameter
from oneflow.python.framework.tensor import Tensor
class ParamGroup(object):
def __init__(
self,
parameters: Union[Iterator[Parameter], Dict[str, Any]],
default_options: Dict,
):
if isinstance(parameters, GeneratorType):
self._parameters = list(parameters)
self._options = default_options
else: # Dict
assert "param" in parameters
self._parameters = list(parameters["param"])
self._options = default_options
for key in self._options:
if key in parameters:
self._options[key] = parameters[key]
@property
def options(self):
return self._options
@property
def parameters(self):
return self._parameters
class Optimizer(object):
def __init__(self):
self._param_groups = list()
self._default_options = dict()
self._state = dict()
self._state["step"] = 0
self._op = None
def add_param_group(self, param_group) -> None:
# TODO(wyg)
raise NotImplementedError()
def load_state_dict(self, state_dict) -> None:
# TODO(wyg)
raise NotImplementedError()
def state_dict(self):
# TODO(wyg)
raise NotImplementedError()
def step(self, closure: Union[Callable, None] = None) -> Union[Tensor, None]:
raise NotImplementedError()
def zero_grad(self, set_to_none: bool = False):
for param_group in self._param_groups:
for param in param_group.parameters:
if set_to_none:
param.grad = None
else:
param.grad.fill_(0)
# param.grad.zeros_()
@oneflow_export("optim.SGD")
class SGD(Optimizer):
r"""
TODO
"""
def __init__(
self,
parameters: Union[Iterator[Parameter], List[Dict]],
lr: float,
momentum: float = 0.0,
scale: float = 1.0,
):
super().__init__()
assert lr >= 0.0, f"Invalid learning rate: {lr}"
assert momentum >= 0.0, f"Invalid momentum: {momentum}"
assert scale >= 0.0, f"Invalid scale factor: {scale}"
self._default_options = dict()
self._default_options["lr"] = lr
self._default_options["scale"] = scale
if momentum != 0.0:
self._default_options["momentum"] = momentum
# Add parameters
if isinstance(parameters, GeneratorType):
self._param_groups.append(ParamGroup(parameters, self._default_options))
else: # List[Dict]
for param in parameters:
self._param_groups.append(ParamGroup(param, self._default_options))
for param_group in self._param_groups:
for param in param_group.parameters:
assert param.is_leaf, "parameters must be leaf tensor"
self._state[param] = dict()
if "momentum" in self._default_options:
self._state[param]["momentum_buf"] = flow.tmp.zeros(
# TODO: zeros module support flow.Size parameter
tuple(param.shape)
)
if "momentum" in self._default_options.keys():
self._op = (
flow.builtin_op("momentum_update")
.Input("model")
.Input("model_diff")
.Input("learning_rate")
.Input("momentum")
.Attr("scale", self._default_options["scale"])
.Attr("l1", 0.0)
.Attr("l2", 0.0)
.Attr("beta", self._default_options["momentum"])
.Attr("weight_decay", 0.0)
.Build()
)
else:
self._op = (
flow.builtin_op("sgd_update")
.Input("model")
.Input("model_diff")
.Input("learning_rate")
.Attr("scale", self._default_options["scale"])
.Attr("weight_decay", 0.0)
.Attr("l1", 0.0)
.Attr("l2", 0.0)
.Build()
)
def step(self, closure: Callable = None):
with flow.no_grad():
loss = None
if closure is not None:
loss = closure()
for param_group in self._param_groups:
lr_tensor = flow.Tensor([param_group.options["lr"]])
for param in param_group.parameters:
if param.grad is None:
continue
if "momentum" in self._default_options:
momentum_buf = self._state[param]["momentum_buf"]
self._op(param, param.grad, lr_tensor, momentum_buf)
else:
self._op(param, param.grad, lr_tensor)
self._state["step"] = self._state["step"] + 1
return loss
"""
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import unittest
from collections import OrderedDict
import numpy as np
import oneflow as flow
from test_util import GenArgList
from oneflow.python.nn.parameter import Parameter
def compare_with_numpy_sgd(
test_case, x_shape, scale, momentum, learning_rate, train_iters,
):
# generate random number sequences
random_grad_seq = []
for _ in range(train_iters):
random_grad_seq.append(np.random.uniform(size=x_shape).astype(np.float32))
init_value = np.random.uniform(size=x_shape).astype(np.float32)
def train_by_oneflow():
x = Parameter(flow.Tensor(init_value))
param_list = list()
param_list.append(x)
sgd = flow.optim.SGD(
[{"param": param_list}], lr=learning_rate, momentum=momentum, scale=scale
)
def train_one_iter(grad):
grad_tensor = flow.Tensor(grad, requires_grad=False)
loss = x * grad_tensor
# BUG: loss = flow.sum(x * grad_tensor)
grad = flow.Tensor(np.ones(list(loss.shape)))
loss.backward(grad)
sgd.step()
sgd.zero_grad()
for i in range(train_iters):
train_one_iter(random_grad_seq[i])
return x
def train_by_numpy():
x = init_value
vt = np.zeros_like(x)
def train_one_iter(grad):
v = momentum * vt + learning_rate * scale * grad
param = x - v
return param, v
for i in range(train_iters):
x, vt = train_one_iter(random_grad_seq[i])
return x
oneflow_res = train_by_oneflow().numpy()
numpy_res = train_by_numpy()
test_case.assertTrue(
np.allclose(oneflow_res.flatten(), numpy_res.flatten(), rtol=1e-4, atol=1e-4)
)
@unittest.skipIf(
not flow.unittest.env.eager_execution_enabled(),
".numpy() doesn't work in lazy mode",
)
class TestOptimizers(flow.unittest.TestCase):
def test_sgd(test_case):
arg_dict = OrderedDict()
arg_dict["x_shape"] = [(10,)]
arg_dict["scale"] = [1.0, 0.9]
arg_dict["momentum"] = [0.0, 0.9]
arg_dict["learning_rate"] = [1]
arg_dict["train_iters"] = [10]
for arg in GenArgList(arg_dict):
compare_with_numpy_sgd(test_case, *arg)
if __name__ == "__main__":
unittest.main()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment