Deprecate Differentiable.AllDifferentiableVariables.

dan-zheng · dan-zheng · commit 8911fd8f18fd · 2019-08-06T19:49:24.000-07:00
Remove usages of `AllDifferentiableVariables` and
`var allDifferentiableVariables`.
diff --git a/Sources/TensorFlow/Core/DataTypes.swift b/Sources/TensorFlow/Core/DataTypes.swift
@@ -84,8 +84,7 @@ extension Int64: TensorFlowIndex {}
 public protocol TensorFlowFloatingPoint:
     TensorFlowScalar & BinaryFloatingPoint & Differentiable & ElementaryFunctions
     where Self.RawSignificand: FixedWidthInteger,
-          Self == Self.TangentVector,
-          Self == Self.AllDifferentiableVariables {}
+          Self == Self.TangentVector {}
 
 extension Float: TensorFlowFloatingPoint {}
 extension Double: TensorFlowFloatingPoint {}
diff --git a/Sources/TensorFlow/Core/Tensor.swift b/Sources/TensorFlow/Core/Tensor.swift
@@ -578,5 +578,4 @@ extension Tensor: PointwiseMultiplicative where Scalar: Numeric {
 
 extension Tensor: Differentiable where Scalar: TensorFlowFloatingPoint {
     public typealias TangentVector = Tensor
-    public typealias AllDifferentiableVariables = Tensor
 }
diff --git a/Sources/TensorFlow/Layer.swift b/Sources/TensorFlow/Layer.swift
@@ -14,8 +14,7 @@
 
 public protocol Module: Differentiable, KeyPathIterable
     where TangentVector: VectorProtocol & ElementaryFunctions &
-                         PointwiseMultiplicative & KeyPathIterable,
-          AllDifferentiableVariables == TangentVector {
+                         PointwiseMultiplicative & KeyPathIterable {
     /// The input type of the layer.
     associatedtype Input
     /// The output type of the layer.
@@ -55,7 +54,6 @@ public extension Layer {
 /// An empty struct representing empty `TangentVector`s for parameterless layers.
 public struct EmptyTangentVector: Differentiable, VectorProtocol, ElementaryFunctions,
                                   PointwiseMultiplicative, KeyPathIterable {
-    public typealias AllDifferentiableVariables = EmptyTangentVector
     public typealias VectorSpaceScalar = Float
 
     public func adding(_ x: Float) -> EmptyTangentVector { self }
@@ -69,17 +67,13 @@ public struct EmptyTangentVector: Differentiable, VectorProtocol, ElementaryFunc
 /// A parameterless neural network layer.
 ///
 /// The `TangentVector` of parameterless layers is always `EmptyTangentVector`.
-public protocol ParameterlessLayer: Layer where AllDifferentiableVariables == EmptyTangentVector {
+// public protocol ParameterlessLayer: Layer where TangentVector == EmptyTangentVector {
+public protocol ParameterlessLayer: Layer {
     @differentiable
     func callAsFunction(_ input: Input) -> Output
 }
 
 public extension ParameterlessLayer {
-    var allDifferentiableVariables: EmptyTangentVector {
-        get { EmptyTangentVector() }
-        set {}
-    }
-
     mutating func move(along direction: EmptyTangentVector) {}
 }
 
@@ -98,7 +92,7 @@ public extension Layer {
     @usableFromInline
     internal func _vjpInferring(from input: Input)
         -> (value: Output, pullback: (Output.TangentVector)
-            -> (AllDifferentiableVariables, Input.TangentVector)) {
+            -> (TangentVector, Input.TangentVector)) {
         withLearningPhase(LearningPhase.inference) {
             let (output, pullback) = appliedForBackpropagation(to: input)
             return (output, { v in pullback(v) })
diff --git a/Sources/TensorFlow/Layers/Upsampling.swift b/Sources/TensorFlow/Layers/Upsampling.swift
@@ -93,7 +93,7 @@ public struct UpSampling3D<Scalar: TensorFlowFloatingPoint>: ParameterlessLayer
 
     private func _vjpRepeatingElements(
         _ input: Tensor<Scalar>, alongAxis axis: Int, count: Int
-    ) -> (Tensor<Scalar>, (Tensor<Scalar>) -> (AllDifferentiableVariables, Tensor<Scalar>)) {
+    ) -> (Tensor<Scalar>, (Tensor<Scalar>) -> (TangentVector, Tensor<Scalar>)) {
         let value = repeatingElements(input, alongAxis: axis, count: count)
         return (value, { v in
             let splits = Raw.split(
diff --git a/Sources/TensorFlow/Optimizers/MomentumBased.swift b/Sources/TensorFlow/Optimizers/MomentumBased.swift
@@ -55,14 +55,6 @@ public class RMSProp<Model: Differentiable>: Optimizer
     }
 
     public func update(_ model: inout Model, along direction: Model.TangentVector) {
-        update(&model.allDifferentiableVariables, along: direction)
-    }
-
-    // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
-    public func update(
-        _ model: inout Model.AllDifferentiableVariables,
-        along direction: Model.TangentVector
-    ) {
         step += 1
         let learningRate = self.learningRate * 1 / (1 + decay * Float(step))
         alpha = alpha * rho + direction .* direction * (1 - rho)
@@ -107,14 +99,6 @@ public class AdaGrad<Model: Differentiable>: Optimizer
     }
 
     public func update(_ model: inout Model, along direction: Model.TangentVector) {
-        update(&model.allDifferentiableVariables, along: direction)
-    }
-
-    // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
-    public func update(
-        _ model: inout Model.AllDifferentiableVariables,
-        along direction: Model.TangentVector
-    ) {
         alpha = rho + direction .* direction
         let denominator = Model.TangentVector.sqrt(alpha) + epsilon
         model.move(along: -learningRate * direction ./ denominator)
@@ -166,14 +150,6 @@ public class AdaDelta<Model: Differentiable>: Optimizer
     }
 
     public func update(_ model: inout Model, along direction: Model.TangentVector) {
-        update(&model.allDifferentiableVariables, along: direction)
-    }
-
-    // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
-    public func update(
-        _ model: inout Model.AllDifferentiableVariables,
-        along direction: Model.TangentVector
-    ) {
         step += 1
         let learningRate = self.learningRate / (1 + decay * Float(step))
         averageSquared = rho * averageSquared + (1 - rho) * direction .* direction
@@ -230,15 +206,7 @@ public class Adam<Model: Differentiable>: Optimizer
     }
 
     public func update(_ model: inout Model, along direction: Model.TangentVector) {
-        update(&model.allDifferentiableVariables, along: direction)
-    }
-
-    // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
-    public func update(
-        _ model: inout Model.AllDifferentiableVariables,
-        along direction: Model.TangentVector
-    ) {
-        self.step += 1
+        step += 1
         let step = Float(self.step)
         let learningRate = self.learningRate * 1 / (1 + decay * step)
         // Note: `stepSize` and `secondMoments` are split into two lines to avoid the "compiler is 
@@ -262,8 +230,7 @@ public class Adam<Model: Differentiable>: Optimizer
 public class AdaMax<Model: Differentiable & KeyPathIterable>: Optimizer
     where Model.TangentVector: VectorProtocol & PointwiseMultiplicative & 
                                ElementaryFunctions & KeyPathIterable,
-          Model.TangentVector.VectorSpaceScalar == Float,
-          Model.AllDifferentiableVariables == Model.TangentVector {
+          Model.TangentVector.VectorSpaceScalar == Float {
     public typealias Model = Model
     /// The learning rate.
     public var learningRate: Float
@@ -304,15 +271,7 @@ public class AdaMax<Model: Differentiable & KeyPathIterable>: Optimizer
     }
 
     public func update(_ model: inout Model, along direction: Model.TangentVector) {
-        update(&model.allDifferentiableVariables, along: direction)
-    }
-
-    // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
-    public func update(
-        _ model: inout Model.AllDifferentiableVariables,
-        along direction: Model.TangentVector
-    ) {
-        self.step += 1
+        step += 1
         let step = Float(self.step)
         let learningRate = self.learningRate * 1 / (1 + decay * step)
         // Note: `stepSize` is split into two lines to avoid the "compiler is unable to type-check
@@ -323,11 +282,11 @@ public class AdaMax<Model: Differentiable & KeyPathIterable>: Optimizer
 
         // Update `infinityNorm` using a key path approach because `max(_:_:)` cannot be 
         // currently applied in a simpler manner.
-        for kp in model.recursivelyAllWritableKeyPaths(to: Tensor<Float>.self) {
+        for kp in infinityNorm.recursivelyAllWritableKeyPaths(to: Tensor<Float>.self) {
             infinityNorm[keyPath: kp] = max(
                 beta2 * infinityNorm[keyPath: kp], abs(direction[keyPath: kp]))
         }
-        for kp in model.recursivelyAllWritableKeyPaths(to: Tensor<Double>.self) {
+        for kp in infinityNorm.recursivelyAllWritableKeyPaths(to: Tensor<Double>.self) {
             infinityNorm[keyPath: kp] = max(
                 Double(beta2) * infinityNorm[keyPath: kp], abs(direction[keyPath: kp]))
         }
@@ -347,8 +306,7 @@ public class AdaMax<Model: Differentiable & KeyPathIterable>: Optimizer
 public class AMSGrad<Model: Differentiable & KeyPathIterable>: Optimizer
     where Model.TangentVector: VectorProtocol & PointwiseMultiplicative & 
                                ElementaryFunctions & KeyPathIterable,
-          Model.TangentVector.VectorSpaceScalar == Float,
-          Model.AllDifferentiableVariables == Model.TangentVector {
+          Model.TangentVector.VectorSpaceScalar == Float {
     public typealias Model = Model
     /// The learning rate.
     public var learningRate: Float
@@ -390,15 +348,7 @@ public class AMSGrad<Model: Differentiable & KeyPathIterable>: Optimizer
     }
 
     public func update(_ model: inout Model, along direction: Model.TangentVector) {
-        update(&model.allDifferentiableVariables, along: direction)
-    }
-
-    // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
-    public func update(
-        _ model: inout Model.AllDifferentiableVariables,
-        along direction: Model.TangentVector
-    ) {
-        self.step += 1
+        step += 1
         let step = Float(self.step)
         let beta1Power = pow(beta1, step)
         let beta2Power = pow(beta2, step)
@@ -413,11 +363,11 @@ public class AMSGrad<Model: Differentiable & KeyPathIterable>: Optimizer
 
         // Update `secondMomentsMax` using a key path approach because `max(_:_:)` cannot be 
         // currently applied in a simpler manner.
-        for kp in model.recursivelyAllWritableKeyPaths(to: Tensor<Float>.self) {
+        for kp in secondMomentsMax.recursivelyAllWritableKeyPaths(to: Tensor<Float>.self) {
             secondMomentsMax[keyPath: kp] = max(
                 secondMomentsMax[keyPath: kp], secondMoments[keyPath: kp])
         }
-        for kp in model.recursivelyAllWritableKeyPaths(to: Tensor<Double>.self) {
+        for kp in secondMomentsMax.recursivelyAllWritableKeyPaths(to: Tensor<Double>.self) {
             secondMomentsMax[keyPath: kp] = max(
                 secondMomentsMax[keyPath: kp], secondMoments[keyPath: kp])
         }
diff --git a/Sources/TensorFlow/Optimizers/SGD.swift b/Sources/TensorFlow/Optimizers/SGD.swift
@@ -52,14 +52,6 @@ public class SGD<Model: Differentiable>: Optimizer
     }
 
     public func update(_ model: inout Model, along direction: Model.TangentVector) {
-        update(&model.allDifferentiableVariables, along: direction)
-    }
-
-    // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
-    public func update(
-        _ model: inout Model.AllDifferentiableVariables,
-        along direction: Model.TangentVector
-    ) {
         step += 1
         let learningRate = self.learningRate * 1 / (1 + decay * Float(step))
         velocity = momentum * velocity - direction * learningRate

Original file line number	Diff line number	Diff line change
`@@ -578,5 +578,4 @@ extension Tensor: PointwiseMultiplicative where Scalar: Numeric {`
`578`	`578`
`579`	`579`	`extension Tensor: Differentiable where Scalar: TensorFlowFloatingPoint {`
`580`	`580`	`public typealias TangentVector = Tensor`
`581`		`- public typealias AllDifferentiableVariables = Tensor`
`582`	`581`	`}`