[ https://issues.apache.org/jira/browse/FLINK1979?page=com.atlassian.jira.plugin.system.issuetabpanels:commenttabpanel&focusedCommentId=15309044#comment15309044
]
ASF GitHub Bot commented on FLINK1979:

Github user chiwanpark commented on a diff in the pull request:
https://github.com/apache/flink/pull/1985#discussion_r65291552
 Diff: flinklibraries/flinkml/src/main/scala/org/apache/flink/ml/optimization/RegularizationPenalty.scala

@@ 0,0 +1,215 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.ml.optimization
+
+import org.apache.flink.ml.math.{Vector, BLAS}
+import org.apache.flink.ml.math.Breeze._
+import breeze.linalg.{norm => BreezeNorm}
+
+/** Represents a type of regularization penalty
+ *
+ * Regularization penalties are used to restrict the optimization problem to solutions
with
+ * certain desirable characteristics, such as sparsity for the L1 penalty, or penalizing
large
+ * weights for the L2 penalty.
+ *
+ * The regularization term, `R(w)` is added to the objective function, `f(w) = L(w)
+ lambda*R(w)`
+ * where lambda is the regularization parameter used to tune the amount of regularization
applied.
+ */
+trait RegularizationPenalty extends Serializable {
+
+ /** Calculates the new weights based on the gradient and regularization penalty
+ *
+ * @param weightVector The weights to be updated
+ * @param gradient The gradient used to update the weights
+ * @param regularizationConstant The regularization parameter to be applied
+ * @param learningRate The effective step size for this iteration
+ * @return Updated weights
+ */
+ def takeStep(
+ weightVector: Vector,
+ gradient: Vector,
+ regularizationConstant: Double,
+ learningRate: Double)
+ : Vector
+
+ /** Adds regularization to the loss value
+ *
+ * @param oldLoss The loss to be updated
+ * @param weightVector The gradient used to update the loss
+ * @param regularizationConstant The regularization parameter to be applied
+ * @return Updated loss
+ */
+ def regLoss(oldLoss: Double, weightVector: Vector, regularizationConstant: Double):
Double
+
+}
+
+
+/** `L_2` regularization penalty.
+ *
+ * The regularization function is the square of the L2 norm `1/2*w_2^2`
+ * with `w` being the weight vector. The function penalizes large weights,
+ * favoring solutions with more small weights rather than few large ones.
+ */
+object L2Regularization extends RegularizationPenalty {
+
+ /** Calculates the new weights based on the gradient and L2 regularization penalty
+ *
+ * The updated weight is `w  learningRate *(gradient + lambda * w)` where
+ * `w` is the weight vector, and `lambda` is the regularization parameter.
+ *
+ * @param weightVector The weights to be updated
+ * @param gradient The gradient according to which we will update the weights
+ * @param regularizationConstant The regularization parameter to be applied
+ * @param learningRate The effective step size for this iteration
+ * @return Updated weights
+ */
+ override def takeStep(
+ weightVector: Vector,
+ gradient: Vector,
+ regularizationConstant: Double,
+ learningRate: Double)
+ : Vector = {
+ // add the gradient of the L2 regularization
+ BLAS.axpy(regularizationConstant, weightVector, gradient)
+
+ // update the weights according to the learning rate
+ BLAS.axpy(learningRate, gradient, weightVector)
+
+ weightVector
+ }
+
+ /** Adds regularization to the loss value
+ *
+ * The updated loss is `oldLoss + lambda * 1/2*w_2^2` where
+ * `w` is the weight vector, and `lambda` is the regularization parameter
+ *
+ * @param oldLoss The loss to be updated
+ * @param weightVector The gradient used to update the loss
+ * @param regularizationConstant The regularization parameter to be applied
+ * @return Updated loss
+ */
+ override def regLoss(oldLoss: Double, weightVector: Vector, regularizationConstant:
Double)
+ : Double = {
+ val squareNorm = BLAS.dot(weightVector, weightVector)
+ oldLoss + regularizationConstant * 0.5 * squareNorm
+ }
+}
+
+/** `L_1` regularization penalty.
+ *
+ * The regularization function is the `L1` norm `w_1` with `w` being the weight
vector.
+ * The `L_1` penalty can be used to drive a number of the solution coefficients to 0,
thereby
+ * producing sparse solutions.
+ *
+ */
+object L1Regularization extends RegularizationPenalty {
+
+ /** Calculates the new weights based on the gradient and regularization penalty
+ *
+ * The updated weight `w  learningRate * gradient` is shrunk towards zero
+ * by applying the proximal operator `signum(w) * max(0.0, abs(w)  shrinkageVal)`
+ * where `w` is the weight vector, `lambda` is the regularization parameter,
+ * and `shrinkageVal` is `lambda*learningRate`.
+ *
+ * @param weightVector The weights to be updated
+ * @param gradient The gradient according to which we will update the weights
+ * @param regularizationConstant The regularization parameter to be applied
+ * @param learningRate The effective step size for this iteration
+ * @return Updated weights
+ */
+ override def takeStep(
+ weightVector: Vector,
+ gradient: Vector,
+ regularizationConstant: Double,
+ learningRate: Double)
+ : Vector = {
+ // Update weight vector with gradient.
+ BLAS.axpy(learningRate, gradient, weightVector)
+
+ // Apply proximal operator (soft thresholding)
+ val shrinkageVal = regularizationConstant * learningRate
+ var i = 0
+ while (i < weightVector.size) {
+ val wi = weightVector(i)
+ weightVector(i) = scala.math.signum(wi) *
+ scala.math.max(0.0, scala.math.abs(wi)  shrinkageVal)
 End diff 
We can change `scala.math` to `math`.
> Implement Loss Functions
> 
>
> Key: FLINK1979
> URL: https://issues.apache.org/jira/browse/FLINK1979
> Project: Flink
> Issue Type: Improvement
> Components: Machine Learning Library
> Reporter: Johannes Günther
> Assignee: Johannes Günther
> Priority: Minor
> Labels: ML
>
> For convex optimization problems, optimizer methods like SGD rely on a pluggable implementation
of a loss function and its first derivative.

This message was sent by Atlassian JIRA
(v6.3.4#6332)
