I am running this code below:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
"""
This module is a string prediction model using LSTM.
It takes a file of strings composed of digits from 0 to 9 and splits them into input and target sequences.
The input sequence is the first five characters and the target sequence is the last five characters shifted by one position.
The model learns to predict the next character in the sequence given the previous five characters.
"""
VOCAB_SIZE = 10
EMBED_SIZE = 32
RNN_UNITS = 32
BATCH_SIZE = 20
import pandas as pd
file_path = "C:\\Users\\PC-1\\Desktop\\stringpred.txt"
df = pd.read_csv(file_path, header=None)
df = df.apply(lambda x: pd.to_numeric(x.str.replace(" ", "")))
arrays = df.values
def split_sequences(arrays)
input_sequences = []
target_sequences = []
for a in arrays:
input_sequence = a[:-1]
target_sequence = a[1:]
input_sequences.append(input_sequence)
target_sequences.append(target_sequence)
return input_sequences, target_sequences
input_sequences, target_sequences = split_sequences(arrays)
X_train, X_test, y_train, y_test = train_test_split(input_sequences, target_sequences, test_size=0.2, random_state=42)
X_train = np.reshape(X_train, (-1, 5))
y_train = np.reshape(y_train, (-1, 5))
X_test = np.reshape(X_test, (-1, 5))
y_test = np.reshape(y_test, (-1, 5))
X_test = np.pad(X_test, (0, 5 - len(X_test) % 5), mode="constant")
y_test = np.pad(y_test, (0, 5 - len(y_test) % 5), mode="constant")
X_train = np.asarray(X_train, dtype=np.float32)
y_train = np.asarray(y_train, dtype=np.float32)
X_test = np.asarray(X_test, dtype=np.float32)
y_test = np.asarray(y_test, dtype=np.float32)
def generate_string(seed, model, subarrays)
seed_array = np.array([int(c) for c in seed])
output_array = []
for i in range(6):
probs = np.concatenate([model.predict(sub) for sub in subarrays], axis=0)
next_token = np.random.choice(VOCAB_SIZE, p=probs[0, -1])
output_array.append(next_token)
seed_array = np.append(seed_array[1:], next_token)
output_string = "".join(map(str, output_array))
return output_string
df = pd.read_csv(file_path, header=None)
df = df.apply(lambda x: pd.to_numeric(x.str.replace(" ", "")))
arrays = df.values
def split_sequences(arrays)
input_sequences = []
target_sequences = []
for a in arrays:
input_sequence = a[:-1]
target_sequence = a[1:]
input_sequences.append(input_sequence)
target_sequences.append(target_sequence)
return input_sequences, target_sequences
def generate_string(seed, model, subarrays)
seed_array = np.array([int(c) for c in seed])
output_array = []
for i in range(6):
probs = np.concatenate([model.predict(sub) for sub in subarrays], axis=0)
next_token = np.random.choice(VOCAB_SIZE, p=probs[0, -1])
output_array.append(next_token)
seed_array = np.append(seed_array[1:], next_token)
output_string = "".join(map(str, output_array))
return output_string
input_sequences, target_sequences = split_sequences(arrays)
X_train, X_test, y_train, y_test = train_test_split(input_sequences, target_sequences, test_size=0.2, random_state=42)
X_train = np.reshape(X_train, (-1, 5))
y_train = np.reshape(y_train, (-1, 5))
X_test = np.reshape(X_test, (-1, 5))
y_test = np.reshape(y_test, (-1, 5))
X_test = np.pad(X_test, (0, 5 - len(X_test) % 5), mode="constant")
y_test = np.pad(y_test, (0, 5 - len(y_test) % 5), mode="constant")
X_train = np.asarray(X_train, dtype=np.float32)
y_train = np.asarray(y_train, dtype=np.float32)
X_test = np.asarray(X_test, dtype=np.float32)
y_test = np.asarray(y_test, dtype=np.float32)
subarrays = np.array_split(X_test, len(X_test) / 5)
model = keras.Sequential([
layers.Embedding(input_dim=VOCAB_SIZE, output_dim=EMBED_SIZE),
layers.LSTM(units=RNN_UNITS, return_sequences=True),
layers.Dense(units=VOCAB_SIZE, activation="softmax")
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam")
model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=10)
print(generate_string("55420", model, subarrays))
print(generate_string("13120", model, subarrays))
print(generate_string("25050", model, subarrays))
Initially, I had this recurring error message (for three times) as I ran the code:
Traceback (most recent call last):
File "C:/Users/PC-1/Desktop/String Predict ver03-A-1.py", line 182, in <module>
arrays = read_strings(file_path)
NameError: name 'read_strings' is not defined
That refers to this line here:
arrays = read_strings(file_path)
...so I had that turn into a comment so it won't mess up the execution, then ran the code again.
Now it is giving me this error message:
Epoch 1/10
Traceback (most recent call last):
File "C:/Users/PC-1/Desktop/String Predict ver03-A-1.py", line 223, in <module>
model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=10)
File "C:\Users\PC-1\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "C:\Users\PC-1\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py", line 1754, in fit
raise ValueError(
ValueError: Unexpected result of `train_function` (Empty logs). This could be due to issues in input pipeline that resulted in an empty dataset. Otherwise, please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.
Am at my wits' end here - can anyone tell me what to fix here?
If it would help clarify my problem, that code is meant to solve this particular programming problem:
Create a Python source code that will predict the next unique string to appear based on a list of six-character strings ranging from 0 to 5 stored in the windows text file, "stringpred.txt". As an example of what the list of strings look like, refer to the section below:
...
5 5 4 2 0 5
5 4 1 4 5 5
4 4 4 2 2 0
1 3 1 2 0 1
1 2 4 4 5 5
3 2 1 4 5 5
5 1 5 2 5 4
0 1 5 5 5 4
3 3 1 5 3 5
5 3 3 4 3 5
0 5 3 3 0 2
3 3 0 3 5 1
5 2 2 5 4 0
3 4 3 5 2 3
4 5 2 3 4 5
3 0 4 4 5 5
2 1 2 4 5 5
4 3 0 0 1 5
4 3 2 2 2 4
2 5 0 5 0 3
3 5 1 3 4 4
...
Format output as..
"The next predicted string will be:
As an example:
3 0 4 4 5 5
2 1 2 4 5 5
4 3 0 0 1 5
4 3 2 2 2 4
2 5 0 5 0 3
The next predicted string will be: 3 5 1 3 4 4
If this is really hard to solve, where other forum site can I go to that can help address this roadblock I ran into?
What I have tried:
Well, I tried turning this line into a comment:
arrays = read_strings(file_path)
...in hopes that it won't mess up the execution. But still, can't get it to run..