[인공지능 #4 ] 여러개의 DATA를 (X 변수) 구현
인공지능 구현에 대한 글입니다.
글의 순서는 아래와 같습니다.
================================================
1. X의 갯수가3개인 경우
==> 코딩이 매우 복잡해짐 , 대안으로 "매트릭스_행렬 "을 사용하면 간단해짐.
2. 매트릭스 적용
==> DATA 갯수(X 변수의갯수) 이 많아져도 간단하게 코딩이 가능해짐
3. TENSOR FLOW로 파일에서 DATA 읽어오기
==> DATA를 프로그램상에 올리면 메모리한계가 있으므로, DATA를 별도의 화일에 저장하고 불러오는것이 유리함
4. DATA QUE로 읽어들임 시간차로 처리해서 시스템 부하를 줄어줌
==> 화일이 많거나, 용량이 커지면 TENSORFLOW에서 자동으로 시간차를 두고 처리해주는 시스템 구현이 가능함
5. Next step
==> LOGISTICS 알고리즘 : CLASSSIFICATION 중 주요한 알고리즘.
6.참고자료
=================================================
[ Cost 최소화 알고리즘 구현 소스코드 분석 ]
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# Lab 4 Multi-variable linear regression
import tensorflow as tf
tf.set_random_seed(777) # for reproducibility
==> x값이 3개
x1_data = [73., 93., 89., 96., 73.]
x2_data = [80., 88., 91., 98., 66.]
x3_data = [75., 93., 90., 100., 70.]
y_data = [152., 185., 180., 196., 142.]
# placeholders for a tensor that will be always fed.
x1 = tf.placeholder(tf.float32)
x2 = tf.placeholder(tf.float32)
x3 = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)
w1 = tf.Variable(tf.random_normal([1]), name='weight1')
w2 = tf.Variable(tf.random_normal([1]), name='weight2')
w3 = tf.Variable(tf.random_normal([1]), name='weight3')
b = tf.Variable(tf.random_normal([1]), name='bias')
hypothesis = x1 * w1 + x2 * w2 + x3 * w3 + b
print(hypothesis)
==> x,w값이 3개(x1,x2,x3 ,w1,w2,w3) 인 가설임
==> 코딩이 매우 복잡해짐 , 대안으로 "매트릭스_행렬 "을 사용하면 간단해짐.
# cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize. Need a very small learning rate for this data set
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(2001):
cost_val, hy_val, _ = sess.run([cost, hypothesis, train],
feed_dict={x1: x1_data, x2: x2_data, x3: x3_data, Y: y_data})
if step % 10 == 0:
print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
'''
0 Cost: 19614.8
Prediction:
[ 21.69748688 39.10213089 31.82624626 35.14236832 32.55316544]
10 Cost: 14.0682
Prediction:
[ 145.56100464 187.94958496 178.50236511 194.86721802 146.08096313]
...
1990 Cost: 4.9197
Prediction:
[ 148.15084839 186.88632202 179.6293335 195.81796265 144.46044922]
2000 Cost: 4.89449
Prediction:
[ 148.15931702 186.8805542 179.63194275 195.81971741 144.45298767]
'''
[ 매트릭스 적용 ]
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# Lab 4 Multi-variable linear regression
import tensorflow as tf
tf.set_random_seed(777) # for reproducibility
매트릭로 DATA 적용
x_data = [[73., 80., 75.],
[93., 88., 93.],
[89., 91., 90.],
[96., 98., 100.],
[73., 66., 70.]]
y_data = [[152.],
[185.],
[180.],
[196.],
[142.]]
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
매트릭스 곱샘 코딩
# Hypothesis
hypothesis = tf.matmul(X, W) + b
# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(2001):
cost_val, hy_val, _ = sess.run(
[cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
if step % 10 == 0:
print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
'''
0 Cost: 7105.46
Prediction:
[[ 80.82241058]
[ 92.26364136]
[ 93.70250702]
[ 98.09217834]
[ 72.51759338]]
10 Cost: 5.89726
Prediction:
[[ 155.35159302]
[ 181.85691833]
[ 181.97254944]
[ 194.21760559]
[ 140.85707092]]
...
1990 Cost: 3.18588
Prediction:
[[ 154.36352539]
[ 182.94833374]
[ 181.85189819]
[ 194.35585022]
[ 142.03240967]]
2000 Cost: 3.1781
Prediction:
[[ 154.35881042]
[ 182.95147705]
[ 181.85035706]
[ 194.35533142]
[ 142.036026 ]]
'''
[ TENSOR FLOW로 파일에서 DATA 읽어오기 ]
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# Lab 4 Multi-variable linear regression
import tensorflow as tf
import numpy as np
tf.set_random_seed(777) # for reproducibility
DATA는 CSV 화일에 저장됨, CSV 화일 위치는 PY 화일위치에 있음
xy = np.loadtxt('data-01-test-score.csv', delimiter=',', dtype=np.float32)
슬라이싱방법 숙지필요함
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]
# Make sure the shape and data are OK
print(x_data.shape, x_data, len(x_data))
print(y_data.shape, y_data)
# placeholders for a tensor that will be always fed.
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
# Hypothesis
hypothesis = tf.matmul(X, W) + b
# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
for step in range(2001):
cost_val, hy_val, _ = sess.run(
[cost, hypothesis, train], feed_dict={X: x_data, Y: y_data})
if step % 10 == 0:
print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
나의 점수 예측
# Ask my score
print("Your score will be ", sess.run(
hypothesis, feed_dict={X: [[100, 70, 101]]}))
print("Other scores will be ", sess.run(hypothesis,
feed_dict={X: [[60, 70, 110], [90, 100, 80]]}))
'''
타인의 점수 예측
Your score will be [[ 181.73277283]]
Other scores will be [[ 145.86265564]
[ 187.23129272]]
'''
[ DATA QUE로 읽어들임 시간차로 처리해서 시스템 부하를 줄어줌 ]
import tensorflow as tf
tf.set_random_seed(777) # for reproducibility
화일 하나를 위한 QUE를 만듦
filename_queue = tf.train.string_input_producer(
['data-01-test-score.csv'], shuffle=False, name='filename_queue')
화일 리더 정의
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
# Default values, in case of empty columns. Also specifies the type of the
# decoded result.
CSV로 디코딩
record_defaults = [[0.], [0.], [0.], [0.]]
xy = tf.decode_csv(value, record_defaults=record_defaults)
불러올때마다 10 개씩 불러옴 (BATCH_SIZE =10 )
# collect batches of csv in
train_x_batch, train_y_batch = \
tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10)
# placeholders for a tensor that will be always fed.
X 3개 , Y 1개 데이터 숫자를 맞추어줌
X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])
W = tf.Variable(tf.random_normal([3, 1]), name='weight')
b = tf.Variable(tf.random_normal([1]), name='bias')
# Hypothesis
hypothesis = tf.matmul(X, W) + b
# Simplified cost/loss function
cost = tf.reduce_mean(tf.square(hypothesis - Y))
# Minimize
optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)
# Launch the graph in a session.
sess = tf.Session()
# Initializes global variables in the graph.
sess.run(tf.global_variables_initializer())
# Start populating the filename queue.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for step in range(2001):
x_batch , y_batch 값을 train feed_dict로 넘겨줌
x_batch, y_batch = sess.run([train_x_batch, train_y_batch])
cost_val, hy_val, _ = sess.run(
[cost, hypothesis, train], feed_dict={X: x_batch, Y: y_batch})
if step % 10 == 0:
print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)
coord.request_stop()
coord.join(threads)
# Ask my score
print("Your score will be ",
sess.run(hypothesis, feed_dict={X: [[100, 70, 101]]}))
print("Other scores will be ",
sess.run(hypothesis, feed_dict={X: [[60, 70, 110], [90, 100, 80]]}))
'''
Your score will be [[ 177.78144836]]
Other scores will be [[ 141.10997009]
[ 191.17378235]]
'''
[참고자료 ]
https://www.inflearn.com/course/기본적인-머신러닝-딥러닝-강좌/
http://agiantmind.tistory.com/176
https://www.tensorflow.org/install/
https://github.com/hunkim/deeplearningzerotoall
http://www.derivative-calculator.net/
http://terms.naver.com/entry.nhn?docId=3350391&cid=58247&categoryId=58247 ==> 미분계산/공식
http://matplotlib.org/users/installing.html ==>matplotlib 설치