<?xml version="1.0" encoding="UTF-8"?>
<doi_batch version="4.3.0" xmlns="http://www.crossref.org/doi_resources_schema/4.3.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.crossref.org/doi_resources_schema/4.3.0 http://www.crossref.org/schema/deposit/doi_resources4.3.0.xsd">
<head>
<doi_batch_id>ccff4444-ad21-442b-bb61-b21423fca6aa</doi_batch_id>
<depositor>
<name>beie</name>
<email_address>director@blueeyesintelligence.org</email_address>
</depositor>
</head>
<body>
<doi_citations>
<doi>10.35940/ijitee.B8259.0210421</doi>
<citation_list><citation key="ref0"><doi>10.1007/978-3-7908-2604-3_16</doi><unstructured_citation>Bottou, Léon. &quot;Large-scale machine learning with stochastic gradient descent.&quot; Proceedings of COMPSTAT'2010. Physica-Verlag HD, 2010. 177-186.</unstructured_citation></citation><citation key="ref1"><unstructured_citation>Buduma, Nikhil, and Nicholas Locascio. &quot;Fundamentals of deep learning: designing next-generation machine intelligence algorithms. &quot; O'Reilly Media, Inc.&quot;, 2017.</unstructured_citation></citation><citation key="ref2"><unstructured_citation>Lau, Suki. &quot;Learning rate schedules and adaptive learning rate methods for deep learning.&quot; Towards Data Science 2017.</unstructured_citation></citation><citation key="ref3"><unstructured_citation>Goodfellow, I., Y. Bengio, and A. Courville. &quot;Deep learning, series, the adaptive computation and machine learning series.&quot; 2016.</unstructured_citation></citation><citation key="ref4"><unstructured_citation>Some state of the art optimizers in neural networks, https://hackernoon.com/some-state-of-the-art-optimizers-in-neural-networks-a3c2ba5a5643, on 9/2020.</unstructured_citation></citation><citation key="ref5"><unstructured_citation>&quot;An overview of gradient descent optimization algorithms&quot;, https://ruder.io/optimizing-gradient-descent/, on 9/2020.</unstructured_citation></citation><citation key="ref6"><unstructured_citation>Visa, Sofia, et al. &quot;Confusion Matrix-based Feature Selection.&quot; MAICS 710 (2011): 120-127.</unstructured_citation></citation><citation key="ref7"><unstructured_citation>&quot;Learning rate schedules and adaptive learning rate methods for deep learning,&quot; https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1, on 9/2020.</unstructured_citation></citation><citation key="ref8"><unstructured_citation>Christian Daniel, Jonathan Taylor, and Sebastian Nowozin. Learning step size controllers for robust neural network training. In Thirtieth AAAI Conference on Artificial Intelligence, 2016.</unstructured_citation></citation><citation key="ref9"><unstructured_citation>Smith, Samuel L., et al. &quot;Don't decay the learning rate, increase the batch size.&quot; arXiv preprint arXiv:1711.00489 2017.</unstructured_citation></citation><citation key="ref10"><unstructured_citation>Clevert, Djork-Arné, Thomas Unterthiner, and Sepp Hochreiter. &quot;Fast and accurate deep network learning by exponential linear units (elus).&quot; arXiv preprint arXiv:1511.07289 2015.</unstructured_citation></citation><citation key="ref11"><unstructured_citation>Zeiler, Matthew D. &quot;Adadelta: an adaptive learning rate method.&quot; arXiv preprint arXiv:1212.5701 2012.</unstructured_citation></citation><citation key="ref12"><unstructured_citation>Sutskever, Ilya, et al. &quot;On the importance of initialization and momentum in deep learning.&quot; , International conference on machine learning. 2013.</unstructured_citation></citation><citation key="ref13"><doi>10.3390/sym12040660</doi><unstructured_citation>Park, Jieun, Dokkyun Yi, and Sangmin Ji. &quot;A novel learning rate schedule in optimization for neural networks and its convergence.&quot; Symmetry 12.4 2020: 660.</unstructured_citation></citation><citation key="ref14"><unstructured_citation>Ruder, Sebastian. &quot;An overview of gradient descent optimization algorithms.&quot;, arXiv preprint arXiv:1609.04747 2016.</unstructured_citation></citation><citation key="ref15"><doi>10.1007/978-3-319-18038-0_35</doi><unstructured_citation>Chin, Wei-Sheng, et al. &quot;A learning-rate schedule for stochastic gradient methods to matrix factorization.&quot; Pacific-Asia Conference on Knowledge Discovery and Data Mining. Springer, Cham, 2015.</unstructured_citation></citation><citation key="ref16"><unstructured_citation>Zulkifli, Hafidz. &quot;Understanding learning rates and how it improves performance in deep learning.&quot; Software testing fundalmentals 2018.</unstructured_citation></citation><citation key="ref17"><unstructured_citation>Dauphin, Yann N., et al. &quot;Identifying and attacking the saddle point problem in high-dimensional non-convex optimization.&quot; , Advances in neural information processing systems. 2014.</unstructured_citation></citation><citation key="ref18"><unstructured_citation>Zhang, Sixin, Anna E. Choromanska, and Yann LeCun. &quot;Deep learning with elastic averaging SGD.&quot; , Advances in neural information processing systems. 2015.</unstructured_citation></citation><citation key="ref19"><unstructured_citation>Dozat, Timothy. &quot;Incorporating nesterov momentum into Adam.&quot; , 2016.</unstructured_citation></citation><citation key="ref20"><unstructured_citation>Brownlee, J. &quot;Using learning rate schedules for deep learning models in python with keras.&quot; (2016).</unstructured_citation></citation></citation_list>
</doi_citations>
</body>
</doi_batch>
