@@ -55,14 +55,6 @@ public class RMSProp<Model: Differentiable>: Optimizer
55
55
}
56
56
57
57
public func update( _ model: inout Model , along direction: Model . TangentVector ) {
58
- update ( & model. allDifferentiableVariables, along: direction)
59
- }
60
-
61
- // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
62
- public func update(
63
- _ model: inout Model . AllDifferentiableVariables ,
64
- along direction: Model . TangentVector
65
- ) {
66
58
step += 1
67
59
let learningRate = self . learningRate * 1 / ( 1 + decay * Float( step) )
68
60
alpha = alpha * rho + direction .* direction * ( 1 - rho)
@@ -107,14 +99,6 @@ public class AdaGrad<Model: Differentiable>: Optimizer
107
99
}
108
100
109
101
public func update( _ model: inout Model , along direction: Model . TangentVector ) {
110
- update ( & model. allDifferentiableVariables, along: direction)
111
- }
112
-
113
- // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
114
- public func update(
115
- _ model: inout Model . AllDifferentiableVariables ,
116
- along direction: Model . TangentVector
117
- ) {
118
102
alpha = rho + direction .* direction
119
103
let denominator = Model . TangentVector. sqrt ( alpha) + epsilon
120
104
model. move ( along: - learningRate * direction ./ denominator)
@@ -166,14 +150,6 @@ public class AdaDelta<Model: Differentiable>: Optimizer
166
150
}
167
151
168
152
public func update( _ model: inout Model , along direction: Model . TangentVector ) {
169
- update ( & model. allDifferentiableVariables, along: direction)
170
- }
171
-
172
- // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
173
- public func update(
174
- _ model: inout Model . AllDifferentiableVariables ,
175
- along direction: Model . TangentVector
176
- ) {
177
153
step += 1
178
154
let learningRate = self . learningRate / ( 1 + decay * Float( step) )
179
155
averageSquared = rho * averageSquared + ( 1 - rho) * direction .* direction
@@ -230,15 +206,7 @@ public class Adam<Model: Differentiable>: Optimizer
230
206
}
231
207
232
208
public func update( _ model: inout Model , along direction: Model . TangentVector ) {
233
- update ( & model. allDifferentiableVariables, along: direction)
234
- }
235
-
236
- // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
237
- public func update(
238
- _ model: inout Model . AllDifferentiableVariables ,
239
- along direction: Model . TangentVector
240
- ) {
241
- self . step += 1
209
+ step += 1
242
210
let step = Float ( self . step)
243
211
let learningRate = self . learningRate * 1 / ( 1 + decay * step)
244
212
// Note: `stepSize` and `secondMoments` are split into two lines to avoid the "compiler is
@@ -262,8 +230,7 @@ public class Adam<Model: Differentiable>: Optimizer
262
230
public class AdaMax < Model: Differentiable & KeyPathIterable > : Optimizer
263
231
where Model. TangentVector: VectorProtocol & PointwiseMultiplicative &
264
232
ElementaryFunctions & KeyPathIterable ,
265
- Model. TangentVector. VectorSpaceScalar == Float ,
266
- Model. AllDifferentiableVariables == Model . TangentVector {
233
+ Model. TangentVector. VectorSpaceScalar == Float {
267
234
public typealias Model = Model
268
235
/// The learning rate.
269
236
public var learningRate : Float
@@ -304,15 +271,7 @@ public class AdaMax<Model: Differentiable & KeyPathIterable>: Optimizer
304
271
}
305
272
306
273
public func update( _ model: inout Model , along direction: Model . TangentVector ) {
307
- update ( & model. allDifferentiableVariables, along: direction)
308
- }
309
-
310
- // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
311
- public func update(
312
- _ model: inout Model . AllDifferentiableVariables ,
313
- along direction: Model . TangentVector
314
- ) {
315
- self . step += 1
274
+ step += 1
316
275
let step = Float ( self . step)
317
276
let learningRate = self . learningRate * 1 / ( 1 + decay * step)
318
277
// Note: `stepSize` is split into two lines to avoid the "compiler is unable to type-check
@@ -323,11 +282,11 @@ public class AdaMax<Model: Differentiable & KeyPathIterable>: Optimizer
323
282
324
283
// Update `infinityNorm` using a key path approach because `max(_:_:)` cannot be
325
284
// currently applied in a simpler manner.
326
- for kp in model . recursivelyAllWritableKeyPaths ( to: Tensor< Float> . self ) {
285
+ for kp in infinityNorm . recursivelyAllWritableKeyPaths ( to: Tensor< Float> . self ) {
327
286
infinityNorm [ keyPath: kp] = max (
328
287
beta2 * infinityNorm[ keyPath: kp] , abs ( direction [ keyPath: kp] ) )
329
288
}
330
- for kp in model . recursivelyAllWritableKeyPaths ( to: Tensor< Double> . self ) {
289
+ for kp in infinityNorm . recursivelyAllWritableKeyPaths ( to: Tensor< Double> . self ) {
331
290
infinityNorm [ keyPath: kp] = max (
332
291
Double ( beta2) * infinityNorm[ keyPath: kp] , abs ( direction [ keyPath: kp] ) )
333
292
}
@@ -347,8 +306,7 @@ public class AdaMax<Model: Differentiable & KeyPathIterable>: Optimizer
347
306
public class AMSGrad < Model: Differentiable & KeyPathIterable > : Optimizer
348
307
where Model. TangentVector: VectorProtocol & PointwiseMultiplicative &
349
308
ElementaryFunctions & KeyPathIterable ,
350
- Model. TangentVector. VectorSpaceScalar == Float ,
351
- Model. AllDifferentiableVariables == Model . TangentVector {
309
+ Model. TangentVector. VectorSpaceScalar == Float {
352
310
public typealias Model = Model
353
311
/// The learning rate.
354
312
public var learningRate : Float
@@ -390,15 +348,7 @@ public class AMSGrad<Model: Differentiable & KeyPathIterable>: Optimizer
390
348
}
391
349
392
350
public func update( _ model: inout Model , along direction: Model . TangentVector ) {
393
- update ( & model. allDifferentiableVariables, along: direction)
394
- }
395
-
396
- // TODO: Deprecate this when `Differentiable.AllDifferentiableVariables` is removed.
397
- public func update(
398
- _ model: inout Model . AllDifferentiableVariables ,
399
- along direction: Model . TangentVector
400
- ) {
401
- self . step += 1
351
+ step += 1
402
352
let step = Float ( self . step)
403
353
let beta1Power = pow ( beta1, step)
404
354
let beta2Power = pow ( beta2, step)
@@ -413,11 +363,11 @@ public class AMSGrad<Model: Differentiable & KeyPathIterable>: Optimizer
413
363
414
364
// Update `secondMomentsMax` using a key path approach because `max(_:_:)` cannot be
415
365
// currently applied in a simpler manner.
416
- for kp in model . recursivelyAllWritableKeyPaths ( to: Tensor< Float> . self ) {
366
+ for kp in secondMomentsMax . recursivelyAllWritableKeyPaths ( to: Tensor< Float> . self ) {
417
367
secondMomentsMax [ keyPath: kp] = max (
418
368
secondMomentsMax [ keyPath: kp] , secondMoments [ keyPath: kp] )
419
369
}
420
- for kp in model . recursivelyAllWritableKeyPaths ( to: Tensor< Double> . self ) {
370
+ for kp in secondMomentsMax . recursivelyAllWritableKeyPaths ( to: Tensor< Double> . self ) {
421
371
secondMomentsMax [ keyPath: kp] = max (
422
372
secondMomentsMax [ keyPath: kp] , secondMoments [ keyPath: kp] )
423
373
}
0 commit comments