gofeco commited on
Commit
2ce8f2c
1 Parent(s): b7c38f0

Upload nand.py

Browse files
Files changed (1) hide show
  1. nand.py +349 -0
nand.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
3
+ from langchain_community.vectorstores import Chroma
4
+ from transformers import pipeline
5
+ from sentence_transformers.cross_encoder import CrossEncoder
6
+ import re
7
+
8
+ def setupDB(domain, hasLLM):
9
+ history = []
10
+ history.append("")
11
+ history.append("")
12
+ crossmodel = CrossEncoder("cross-encoder/stsb-distilroberta-base")
13
+ models,allState = nandState()
14
+ support_db = nandGetChroma(domain)
15
+
16
+ insts_db = nandGetChroma("insts")
17
+
18
+
19
+ pdf_dbs = []
20
+ if domain == 'en':
21
+ pdfs = [] #"pdf_0em", "pdf_1em", "pdf_2em", "pdf_3em","pdf_4em"]
22
+ for onepdf in pdfs:
23
+ pdfdb = nandGetChroma(onepdf)
24
+ pdf_dbs.append(pdfdb)
25
+ para = {}
26
+ para['history'] = history
27
+ para['disnum'] = 10
28
+ para['domain'] = domain
29
+ para['crossmodel'] = crossmodel
30
+ para['insts_db'] = insts_db
31
+ para['support_db'] = support_db
32
+ para['pdf_dbs'] = pdf_dbs
33
+ para['hasLLM'] = hasLLM
34
+ return para
35
+ def remapScore(domain, inscore):
36
+ if domain == 'ch':
37
+ xin = 1 - inscore
38
+ a = -0.2
39
+ b = 1.2
40
+ y = a * xin * xin + b * xin
41
+ return int(y * 100)
42
+ else:
43
+ xin = 1 - inscore
44
+ a = -1.2
45
+ b = 2.2
46
+ y = a * xin * xin + b * xin
47
+ return int(y * 100)
48
+
49
+ def process_query(iniquery, para):
50
+ query = re.sub("<br>", "", iniquery)
51
+ ch2en, query = toEn(query)
52
+ if ch2en:
53
+ print(f"Received from connected users : {query}")
54
+ else:
55
+ print(f"Received from connected users : {query}", end='')
56
+ disnum = para['disnum']
57
+ domain = para['domain']
58
+ history = para['history']
59
+ crossmodel = para['crossmodel']
60
+ insts_db = para['insts_db']
61
+ support_db = para['support_db']
62
+ pdf_dbs = para['pdf_dbs']
63
+ hasLLM = para['hasLLM']
64
+ ret = ""
65
+
66
+ needScriptScores = crossmodel.predict([["write a perl ECO script", query]])
67
+ print(f"THE QUERY SCORE for creating eco script: score={needScriptScores[0]}")
68
+ allapis = []
69
+ threshold = 0.45
70
+ if needScriptScores[0] > threshold:
71
+ print(f"THE QUERY REQUIRES CREATING AN ECO SCRIPT score={needScriptScores[0]} > {threshold}")
72
+ retinsts = insts_db.similarity_search_with_score(query, k=10)
73
+ accu = 0
74
+ for inst in retinsts:
75
+ instdoc = inst[0]
76
+ instscore = inst[1]
77
+ instname = instdoc.metadata['source']
78
+ otherfile = re.sub("^insts", "src_en", instname)
79
+ otherfile = re.sub("\.\d+", "", otherfile)
80
+ if not otherfile in allapis:
81
+ allapis.append(otherfile)
82
+ apisize = os.path.getsize(otherfile)
83
+ accu += apisize
84
+ print(f"INST: {instname} SCORE: {instscore} API-size: {apisize} Accu: {accu}")
85
+
86
+ results = []
87
+ docs = support_db.similarity_search_with_score(query, k=8)
88
+ for doc in docs:
89
+ results.append([doc[0], doc[1]])
90
+ for onepdfdb in pdf_dbs:
91
+ pdocs = onepdfdb.similarity_search_with_score(query, k=8)
92
+ for doc in pdocs:
93
+ results.append([doc[0], doc[1]+0.2])
94
+ results.sort(key=lambda x: x[1])
95
+ docnum = len(results)
96
+ index = 1
97
+ for ii in range(docnum):
98
+ doc = results[ii][0]
99
+ source = doc.metadata['source']
100
+ path = source #source.replace("\\", "/")
101
+ #print(f"path={path}")
102
+ if path in allapis:
103
+ print(f"dont use path={path}, it's in instruction list")
104
+ continue
105
+ prefix = "Help:"
106
+ if re.search("api\.", source):
107
+ prefix = "API:"
108
+ elif re.search("man\.", source):
109
+ prefix = "Manual:"
110
+ elif re.search("\.pdf$", source):
111
+ prefix = "PDF:";
112
+ score = remapScore(domain, results[ii][1])
113
+ retcont = doc.page_content
114
+ if re.search("\.pdf$", source):
115
+ page = doc.metadata['page'] + 1
116
+ subpage = doc.metadata['subpage']
117
+ retcont += f"\n<a target='_blank' href='/AI/{path}#page={page}'>PDF{page} {subpage}</a>\n"
118
+ ret += f"Return {index} ({score}) {prefix} {retcont}\n"
119
+ if len(ret) > 6000:
120
+ break
121
+ index += 1
122
+ if index > disnum:
123
+ break
124
+ if hasLLM:
125
+ context = "Context information is below\n---------------------\n"
126
+ if len(allapis):
127
+ context += scriptExamples()
128
+ for oneapi in allapis:
129
+ cont = GetContent(oneapi)
130
+ cont = re.sub("</h3>", " API Detail:", cont)
131
+ cont = re.sub('<.*?>', '', cont)
132
+ cont = re.sub('Examples:.*', '', cont, flags=re.DOTALL)
133
+ context += cont
134
+ context += ret
135
+ prompt = f"{context}\n"
136
+ prompt += "------------------------------------------\n"
137
+ if len(allapis):
138
+ prompt += "Given the context information and not prior knowledge, creat a Perl ECO script by following the format and sequence in the script examples provided above.\n"
139
+ #prompt += "1. Following the format in the script examples provided above.\n"
140
+ #prompt += "2. Following the API sequence in the script examples above, for instance, APIs get_spare_cells and map_spare_cells should be after fix_design.\n"
141
+ else:
142
+ prompt += "Given the context information and not prior knowledge, answer the query.\n"
143
+ prompt += f"Query: {query}\n"
144
+
145
+ llmout = llmGenerate(prompt)
146
+ history[0] = query
147
+ history[1] = llmout
148
+ #return llmout
149
+ outlen = len(llmout)
150
+ prolen = len(prompt)
151
+ print(f"Prompt len: {prolen} LLMOUT len: {outlen}")
152
+ allret = "LLM_OUTPUT_START:"+llmout+"\nEND OF LLM OUTPUT\n"+prompt
153
+ return allret
154
+ return ret
155
+
156
+ def toEn(intxt):
157
+ pattern = re.compile(r'[\u4e00-\u9fff]+')
158
+ if pattern.search(intxt):
159
+ translator = pipeline(task="translation", model="Helsinki-NLP/opus-mt-zh-en")
160
+ ini_text = translator(intxt, max_length=500)[0]['translation_text']
161
+ out_text = re.sub("ECO foot", "ECO Script", ini_text)
162
+ out_text = re.sub("web-based", "netlist", out_text)
163
+ out_text = re.sub(r"\bweb\b", "netlist", out_text)
164
+ out_text = re.sub(r"\bwebsheet\b", "netlist", out_text)
165
+ out_text = re.sub(r"\bweblists?\b", "netlist", out_text)
166
+ print(f"AFTER RESULT: {out_text}")
167
+ return 1, out_text
168
+ return 0, intxt
169
+
170
+
171
+
172
+ def nandGetChroma(domain):
173
+ models,allState = nandState()
174
+ chdb = allState[domain]['chroma']
175
+ print(f"domain: {domain} has chroma dir {chdb}")
176
+ model_ind = allState[domain]['model']
177
+ model_name = models[model_ind]
178
+ embedding_function = SentenceTransformerEmbeddings(model_name=model_name)
179
+ chroma_db = Chroma(persist_directory=chdb, embedding_function=embedding_function)
180
+ return chroma_db
181
+ def nandState():
182
+ models = {'em': "all-MiniLM-L6-v2",
183
+ 'en': "all-mpnet-base-v2",
184
+ 'ch': "shibing624/text2vec-base-chinese-sentence"}
185
+ # chunk is to cut the big PDF page to smaller, 1000byte chunks, and chinese page into smaller chunks
186
+ allState = {'insts':{'cstate':{},'pstate':{},'dir':'insts','json':'filestatus.insts.json','chroma':'chroma_db_insts','model':'en','chunk':0},
187
+ 'en':{'cstate':{},'pstate':{},'dir':'src_en','json':'filestatus.english.json','chroma':'chroma_db_en','model':'en','chunk':0},
188
+ 'ch':{'cstate':{},'pstate':{},'dir':'src_ch','json':'filestatus.chinese.json','chroma':'chroma_db_ch','model':'ch','chunk':1}
189
+ }
190
+
191
+ for ind in range(12):
192
+ name = f"pdf_{ind}em"
193
+ allState[name] = {'cstate':{},'pstate':{},'dir':f"pdf_sub{ind}",'json':f"filestatus.{name}.json",'chroma':f"chroma_db_{name}",'model':'em','chunk':1}
194
+ return models, allState
195
+ def formatPrompt(message, history):
196
+ if history[0]:
197
+ prompt = "Create a new query based on previous query/answer paire and current query:\n"
198
+ prompt += f"Previous query: {history[0]}"
199
+ prompt += f"Previous answer: {histroy[1]}"
200
+ prompt += f"Current query: {message}"
201
+ prompt += "New query:"
202
+ return prompt
203
+ return message
204
+
205
+ def llmNewQuery(prompt, history):
206
+ newpend = formatPrompt(prompt, history)
207
+ newquery = llmGenerate(newpend)
208
+ return newquery
209
+
210
+ def llmGenerate(prompt, temperature=0.001, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0):
211
+ #temperature = float(temperature)
212
+ #if temperature < 1e-2:
213
+ # temperature = 1e-2
214
+ top_p = float(top_p)
215
+
216
+ generate_kwargs = dict(
217
+ temperature=temperature,
218
+ max_new_tokens=max_new_tokens,
219
+ top_p=top_p,
220
+ repetition_penalty=repetition_penalty,
221
+ do_sample=True,
222
+ seed=42,
223
+ )
224
+ llmclient = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
225
+
226
+ stream = llmclient.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
227
+ output = ""
228
+
229
+ for response in stream:
230
+ output += response.token.text
231
+ #yield output
232
+ return output
233
+
234
+
235
+ def thoseRemove():
236
+ those = ["www.synopsys.com", "sy ?nopsys", "cadence", "mentor", "solvnetplus", "solvnet"]
237
+ return those
238
+
239
+ def GetContent(file):
240
+ fcont = ""
241
+ with open(file) as f:
242
+ fcont = f.read()
243
+ return fcont
244
+
245
+ def scriptExamples():
246
+ exp = """
247
+ #The first ECO scipt example for manual ECO:
248
+ use strict;
249
+ setup_eco("eco_example");
250
+ read_library("tsmc.5nm.lib");
251
+ read_design("-imp", "implementation.gv");
252
+ set_top("topmod");
253
+ change_pin("u_abc/state_reg_0_/D", "INVX1", "", "-");
254
+ change_pin("u_abc/state_reg_1_/D", "INVX1", "", "-");
255
+ change_pin("u_abc/state_reg_2_/D", "INVX1", "", "-");
256
+ report_eco(); # ECO report
257
+ check_design();
258
+ write_verilog("eco_verilog.v");# Write out ECO result in Verilog
259
+ #End of the manual ECO script example
260
+
261
+ #The second ECO script example for automatic ECO:
262
+ use strict;
263
+ setup_eco("eco_example");# Setup ECO name
264
+ read_library("tsmc.5nm.lib");# Read in standard library
265
+ # SVF files are optional, best to be used when the design involves multibit flops
266
+ #read_svf("-ref", "reference.svf.txt");
267
+ #read_svf("-imp", "implementation.svf.txt");
268
+ read_design("-ref", "reference.gv");
269
+ read_design("-imp", "implementation.gv");
270
+ set_top("topmod");# Set the top module
271
+ # Preserve DFT Test Logic
272
+ set_ignore_output("scan_out*");
273
+ set_pin_constant("scan_enable", 0);
274
+ set_pin_constant("scan_mode", 0);
275
+ fix_design();
276
+ report_eco(); # ECO report
277
+ check_design();
278
+ write_verilog("eco_verilog.v");# Write out ECO result in Verilog
279
+ run_lec(); # Run GOF LEC to generate Formality help files
280
+ #End of automatic ECO script example
281
+
282
+
283
+ #The third ECO script example is for automatic metal only ECO:
284
+ use strict;
285
+ setup_eco("eco_example");# Setup ECO name
286
+ read_library("tsmc.5nm.lib");# Read in standard library
287
+ # SVF files are optional, best to be used when the design involves multibit flops
288
+ #read_svf("-ref", "reference.svf.txt");
289
+ #read_svf("-imp", "implementation.svf.txt");
290
+ read_design("-ref", "reference.gv");# Read in Reference Netlist
291
+ read_design("-imp", "implementation.gv");
292
+ set_top("topmod");# Set the top module
293
+ set_ignore_output("scan_out*");
294
+ set_pin_constant("scan_enable", 0);
295
+ set_pin_constant("scan_mode", 0);
296
+ read_lef("tsmc.lef"); # Read LEF
297
+ read_def("topmod.def"); # Read Design Exchange Format file
298
+ fix_design(); # Must run before get_spare_cells and map_spare_cells
299
+ get_spare_cells("*/*_SPARE*");
300
+ map_spare_cells();
301
+ report_eco(); # ECO report
302
+ check_design();# Check if the ECO causes any issue, like floating
303
+ write_verilog("eco_verilog.v");# Write out ECO result in Verilog
304
+ write_perl("eco_result.pl");# Write out result in Perl script
305
+ run_lec(); # Run GOF LEC to generate Formality help files
306
+ #End of automatic ECO script example
307
+
308
+ #The four ECO script example is the same as the third ECO script, except fix_design
309
+ # list_file option to load in the ECO points list file converted from RTL-to-RTL LEC result
310
+ fix_design("-list_file", "the_eco_points.txt");
311
+
312
+ #The 5th ECO script example is the same as the 3rd ECO script, except fix_design
313
+ # Enable flatten mode ECO. The default mode is hierarchical. The flatten mode is for small fix but the changes go across
314
+ # module boundaries
315
+ fix_design("-flatten");
316
+
317
+ #The 6th ECO script is similar to the third ECO script, but it dumps formality help file after LEC
318
+ run_lec(); # Run GOF LEC to generate Formality help files
319
+ write_compare_points("compare_points.report");
320
+ write_formality_help_files("fm_dir/formality_help"); # formality_help files are generated in fm_dir folder
321
+
322
+ #The 7th ECO script is similar to the third ECO script, but it uses gate array spare cells
323
+ fix_design(); # Must run before get_spare_cells and map_spare_cells
324
+ # Enable Gate Array Spare Cells Metal Only ECO Flow, map_spare_cells will map to Gate Array Cells only
325
+ get_spare_cells("-gate_array", "G*", "-gate_array_filler", "GFILL*|GDCAP*");
326
+ map_spare_cells();
327
+
328
+ #The 8th ECO script is similar to the third ECO script, but it uses only deleted gates or freed up gates in ECO as spare cells
329
+ fix_design(); # Must run before get_spare_cells and map_spare_cells
330
+ get_spare_cells("-addfreed");
331
+ map_spare_cells();
332
+
333
+ #The 9th ECO script is manual ECO, find all memory hierarchically and tie the pin TEST_SHIFT of memory to net "TEST_EN"
334
+ use strict;
335
+ setup_eco("eco_example");
336
+ read_library("tsmc.3nm.lib");
337
+ read_design("-imp", "from_backend.gv");
338
+ set_top("topmod");
339
+ # Get all memories hierarchically, instance naming, "U_HMEM*"
340
+ my @mems = get_cells("-hier", "U_HMEM*");
341
+ foreach my $mem (@mems){
342
+ change_pin("$mem/TEST_SHIFT", "TEST_EN");
343
+ }
344
+ report_eco(); # ECO report
345
+ check_design();
346
+ write_verilog("mem_eco.v");
347
+
348
+ """
349
+ return exp