Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,73 @@
|
|
1 |
---
|
2 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: mit
|
3 |
+
datasets:
|
4 |
+
- gretelai/synthetic_text_to_sql
|
5 |
+
language:
|
6 |
+
- en
|
7 |
+
library_name: transformers
|
8 |
+
pipeline_tag: text2text-generation
|
9 |
---
|
10 |
+
# Gemma 2B Fine-Tuned SQL Generator
|
11 |
+
|
12 |
+
## Introduction
|
13 |
+
The Gemma 2B SQL Generator is a specialized version of the Gemma 2B model, fine-tuned to generate SQL queries based on a given SQL context. This model has been tailored to assist developers and analysts in generating accurate SQL queries automatically, enhancing productivity and reducing the scope for errors.
|
14 |
+
|
15 |
+
## Model Details
|
16 |
+
- **Model Type:** Gemma 2B
|
17 |
+
- **Fine-Tuning Details:** The model was fine-tuned specifically for generating SQL queries.
|
18 |
+
- **Training Loss:** Achieved a training loss of 0.3, indicating a high level of accuracy in SQL query generation.
|
19 |
+
|
20 |
+
## Installation
|
21 |
+
To set up the necessary environment for using the SQL Generator, run the following commands:
|
22 |
+
```bash
|
23 |
+
pip install torch torch
|
24 |
+
pip install transformers
|
25 |
+
```
|
26 |
+
|
27 |
+
## Inference
|
28 |
+
|
29 |
+
```python
|
30 |
+
prompt_template = """
|
31 |
+
<start_of_turn>user
|
32 |
+
You are an intelligent AI specialized in generating SQL queries. Your task is to assist users in formulating SQL queries to retrieve specific information from a database. Please provide the SQL query corresponding to the given prompt and context:
|
33 |
+
|
34 |
+
Prompt:
|
35 |
+
find the price of laptop
|
36 |
+
|
37 |
+
Context:
|
38 |
+
CREATE TABLE products (
|
39 |
+
product_id INT,
|
40 |
+
product_name VARCHAR(100),
|
41 |
+
category VARCHAR(50),
|
42 |
+
price DECIMAL(10, 2),
|
43 |
+
stock_quantity INT
|
44 |
+
);
|
45 |
+
|
46 |
+
INSERT INTO products (product_id, product_name, category, price, stock_quantity)
|
47 |
+
VALUES
|
48 |
+
(1, 'Smartphone', 'Electronics', 599.99, 100),
|
49 |
+
(2, 'Laptop', 'Electronics', 999.99, 50),
|
50 |
+
(3, 'Headphones', 'Electronics', 99.99, 200),
|
51 |
+
(4, 'T-shirt', 'Apparel', 19.99, 300),
|
52 |
+
(5, 'Jeans', 'Apparel', 49.99, 150);<end_of_turn>
|
53 |
+
<start_of_turn>model
|
54 |
+
"""
|
55 |
+
|
56 |
+
prompt = prompt_template
|
57 |
+
encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True).input_ids
|
58 |
+
|
59 |
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
60 |
+
model.to(device)
|
61 |
+
inputs = encodeds.to(device)
|
62 |
+
|
63 |
+
|
64 |
+
# Increase max_new_tokens if needed
|
65 |
+
generated_ids = model.generate(inputs, max_new_tokens=1000, do_sample=True, temperature = 0.7,pad_token_id=tokenizer.eos_token_id)
|
66 |
+
ans = ''
|
67 |
+
for i in tokenizer.decode(generated_ids[0], skip_special_tokens=True).split('<end_of_turn>')[:2]:
|
68 |
+
ans += i
|
69 |
+
|
70 |
+
# Extract only the model's answer
|
71 |
+
model_answer = ans.split("model")[1].strip()
|
72 |
+
print(model_answer)
|
73 |
+
```
|