Spaces:

gofeco
/

support

Sleeping

App Files Files Community

gofeco commited on Mar 31

Commit

2ce8f2c

•

1 Parent(s): b7c38f0

Upload nand.py

Browse files

Files changed (1) hide show

nand.py +349 -0

nand.py ADDED Viewed

	@@ -0,0 +1,349 @@

+from huggingface_hub import InferenceClient
+from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
+from langchain_community.vectorstores import Chroma
+from transformers import pipeline
+from sentence_transformers.cross_encoder import CrossEncoder
+import re
+def setupDB(domain, hasLLM):
+    history = []
+    history.append("")
+    history.append("")
+    crossmodel = CrossEncoder("cross-encoder/stsb-distilroberta-base")
+    models,allState = nandState()
+    support_db = nandGetChroma(domain)
+    insts_db = nandGetChroma("insts")
+    pdf_dbs = []
+    if domain == 'en':
+        pdfs = [] #"pdf_0em", "pdf_1em", "pdf_2em", "pdf_3em","pdf_4em"]
+        for onepdf in pdfs:
+            pdfdb =  nandGetChroma(onepdf)
+            pdf_dbs.append(pdfdb)
+    para = {}
+    para['history'] = history
+    para['disnum'] = 10
+    para['domain'] = domain
+    para['crossmodel'] = crossmodel
+    para['insts_db'] = insts_db
+    para['support_db'] = support_db
+    para['pdf_dbs'] = pdf_dbs
+    para['hasLLM'] = hasLLM
+    return para
+def remapScore(domain, inscore):
+    if domain == 'ch':
+        xin = 1 - inscore
+        a = -0.2
+        b = 1.2
+        y = a * xin * xin + b * xin
+        return int(y * 100)
+    else:
+        xin = 1 - inscore
+        a = -1.2
+        b = 2.2
+        y = a * xin * xin + b * xin
+        return int(y * 100)
+def process_query(iniquery, para):
+    query = re.sub("<br>", "", iniquery)
+    ch2en, query = toEn(query)
+    if ch2en:
+        print(f"Received from connected users : {query}")
+    else:
+        print(f"Received from connected users : {query}", end='')
+    disnum = para['disnum']
+    domain = para['domain']
+    history = para['history']
+    crossmodel = para['crossmodel']
+    insts_db = para['insts_db']
+    support_db = para['support_db']
+    pdf_dbs = para['pdf_dbs']
+    hasLLM = para['hasLLM']
+    ret = ""
+    needScriptScores = crossmodel.predict([["write a perl ECO script", query]])
+    print(f"THE QUERY SCORE for creating eco script: score={needScriptScores[0]}")
+    allapis = []
+    threshold = 0.45
+    if needScriptScores[0] > threshold:
+        print(f"THE QUERY REQUIRES CREATING AN ECO SCRIPT score={needScriptScores[0]} > {threshold}")
+        retinsts = insts_db.similarity_search_with_score(query, k=10)
+        accu = 0
+        for inst in retinsts:
+            instdoc = inst[0]
+            instscore = inst[1]
+            instname = instdoc.metadata['source']
+            otherfile = re.sub("^insts", "src_en", instname)
+            otherfile = re.sub("\.\d+", "", otherfile)
+            if not otherfile in allapis:
+                allapis.append(otherfile)
+                apisize = os.path.getsize(otherfile)
+                accu += apisize
+                print(f"INST: {instname} SCORE: {instscore} API-size: {apisize} Accu: {accu}")
+    results = []
+    docs = support_db.similarity_search_with_score(query, k=8)
+    for doc in docs:
+        results.append([doc[0], doc[1]])
+    for onepdfdb in pdf_dbs:
+        pdocs = onepdfdb.similarity_search_with_score(query, k=8)
+        for doc in pdocs:
+            results.append([doc[0], doc[1]+0.2])
+    results.sort(key=lambda x: x[1])
+    docnum = len(results)
+    index = 1
+    for ii in range(docnum):
+        doc = results[ii][0]
+        source = doc.metadata['source']
+        path = source #source.replace("\\", "/")
+        #print(f"path={path}")
+        if path in allapis:
+            print(f"dont use path={path}, it's in instruction list")
+            continue
+        prefix = "Help:"
+        if re.search("api\.", source):
+            prefix = "API:"
+        elif re.search("man\.", source):
+            prefix = "Manual:"
+        elif re.search("\.pdf$", source):
+            prefix = "PDF:";
+        score = remapScore(domain, results[ii][1])
+        retcont = doc.page_content
+        if re.search("\.pdf$", source):
+            page = doc.metadata['page'] + 1
+            subpage = doc.metadata['subpage']
+            retcont += f"\n<a target='_blank' href='/AI/{path}#page={page}'>PDF{page} {subpage}</a>\n"
+        ret += f"Return {index} ({score}) {prefix} {retcont}\n"
+        if len(ret) > 6000:
+            break
+        index += 1
+        if index > disnum:
+            break
+    if hasLLM:
+        context = "Context information is below\n---------------------\n"
+        if len(allapis):
+            context += scriptExamples()
+            for oneapi in allapis:
+                cont = GetContent(oneapi)
+                cont = re.sub("</h3>", " API Detail:", cont)
+                cont = re.sub('<.*?>', '', cont)
+                cont = re.sub('Examples:.*', '', cont, flags=re.DOTALL)
+                context += cont
+        context += ret
+        prompt = f"{context}\n"
+        prompt += "------------------------------------------\n"
+        if len(allapis):
+            prompt += "Given the context information and not prior knowledge, creat a Perl ECO script by following the format and sequence in the script examples provided above.\n"
+            #prompt += "1. Following the format in the script examples provided above.\n"
+            #prompt += "2. Following the API sequence in the script examples above, for instance, APIs get_spare_cells and map_spare_cells should be after fix_design.\n"
+        else:
+            prompt += "Given the context information and not prior knowledge, answer the query.\n"
+        prompt += f"Query: {query}\n"
+        llmout = llmGenerate(prompt)
+        history[0] = query
+        history[1] = llmout
+        #return llmout
+        outlen = len(llmout)
+        prolen = len(prompt)
+        print(f"Prompt len: {prolen} LLMOUT len: {outlen}")
+        allret = "LLM_OUTPUT_START:"+llmout+"\nEND OF LLM OUTPUT\n"+prompt
+        return allret
+    return ret
+def toEn(intxt):
+    pattern = re.compile(r'[\u4e00-\u9fff]+')
+    if pattern.search(intxt):
+        translator = pipeline(task="translation", model="Helsinki-NLP/opus-mt-zh-en")
+        ini_text = translator(intxt, max_length=500)[0]['translation_text']
+        out_text = re.sub("ECO foot", "ECO Script", ini_text)
+        out_text = re.sub("web-based", "netlist", out_text)
+        out_text = re.sub(r"\bweb\b", "netlist", out_text)
+        out_text = re.sub(r"\bwebsheet\b", "netlist", out_text)
+        out_text = re.sub(r"\bweblists?\b", "netlist", out_text)
+        print(f"AFTER RESULT: {out_text}")
+        return 1, out_text
+    return 0, intxt
+def nandGetChroma(domain):
+    models,allState = nandState()
+    chdb = allState[domain]['chroma']
+    print(f"domain: {domain} has chroma dir {chdb}")
+    model_ind = allState[domain]['model']
+    model_name = models[model_ind]
+    embedding_function = SentenceTransformerEmbeddings(model_name=model_name)
+    chroma_db = Chroma(persist_directory=chdb, embedding_function=embedding_function)
+    return chroma_db
+def nandState():
+    models = {'em': "all-MiniLM-L6-v2",
+              'en': "all-mpnet-base-v2",
+              'ch': "shibing624/text2vec-base-chinese-sentence"}
+    # chunk is to cut the big PDF page to smaller, 1000byte chunks, and chinese page into smaller chunks
+    allState = {'insts':{'cstate':{},'pstate':{},'dir':'insts','json':'filestatus.insts.json','chroma':'chroma_db_insts','model':'en','chunk':0},
+                'en':{'cstate':{},'pstate':{},'dir':'src_en','json':'filestatus.english.json','chroma':'chroma_db_en','model':'en','chunk':0},
+                'ch':{'cstate':{},'pstate':{},'dir':'src_ch','json':'filestatus.chinese.json','chroma':'chroma_db_ch','model':'ch','chunk':1}
+                }
+    for ind in range(12):
+        name = f"pdf_{ind}em"
+        allState[name] = {'cstate':{},'pstate':{},'dir':f"pdf_sub{ind}",'json':f"filestatus.{name}.json",'chroma':f"chroma_db_{name}",'model':'em','chunk':1}
+    return models, allState
+def formatPrompt(message, history):
+    if history[0]:
+        prompt = "Create a new query based on previous query/answer paire and current query:\n"
+        prompt += f"Previous query: {history[0]}"
+        prompt += f"Previous answer: {histroy[1]}"
+        prompt += f"Current query: {message}"
+        prompt += "New query:"
+        return prompt
+    return message
+def llmNewQuery(prompt, history):
+    newpend = formatPrompt(prompt, history)
+    newquery = llmGenerate(newpend)
+    return newquery
+def llmGenerate(prompt, temperature=0.001, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0):
+    #temperature = float(temperature)
+    #if temperature < 1e-2:
+    #    temperature = 1e-2
+    top_p = float(top_p)
+    generate_kwargs = dict(
+        temperature=temperature,
+        max_new_tokens=max_new_tokens,
+        top_p=top_p,
+        repetition_penalty=repetition_penalty,
+        do_sample=True,
+        seed=42,
+    )
+    llmclient = InferenceClient("mistralai/Mistral-7B-Instruct-v0.2")
+    stream = llmclient.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
+    output = ""
+    for response in stream:
+        output += response.token.text
+        #yield output
+    return output
+def thoseRemove():
+    those = ["www.synopsys.com", "sy ?nopsys", "cadence", "mentor", "solvnetplus", "solvnet"]
+    return those
+def GetContent(file):
+    fcont = ""
+    with open(file) as f:
+        fcont = f.read()
+    return fcont
+def scriptExamples():
+    exp = """
+#The first ECO scipt example for manual ECO:
+use strict;
+setup_eco("eco_example");
+read_library("tsmc.5nm.lib");
+read_design("-imp", "implementation.gv");
+set_top("topmod");
+change_pin("u_abc/state_reg_0_/D", "INVX1", "", "-");
+change_pin("u_abc/state_reg_1_/D", "INVX1", "", "-");
+change_pin("u_abc/state_reg_2_/D", "INVX1", "", "-");
+report_eco(); # ECO report
+check_design();
+write_verilog("eco_verilog.v");# Write out ECO result in Verilog
+#End of the manual ECO script example
+#The second ECO script example for automatic ECO:
+use strict;
+setup_eco("eco_example");# Setup ECO name
+read_library("tsmc.5nm.lib");# Read in standard library
+# SVF files are optional, best to be used when the design involves multibit flops
+#read_svf("-ref", "reference.svf.txt");
+#read_svf("-imp", "implementation.svf.txt");
+read_design("-ref", "reference.gv");
+read_design("-imp", "implementation.gv");
+set_top("topmod");# Set the top module
+# Preserve DFT Test Logic
+set_ignore_output("scan_out*");
+set_pin_constant("scan_enable", 0);
+set_pin_constant("scan_mode", 0);
+fix_design();
+report_eco(); # ECO report
+check_design();
+write_verilog("eco_verilog.v");# Write out ECO result in Verilog
+run_lec(); # Run GOF LEC to generate Formality help files
+#End of automatic ECO script example
+#The third ECO script example is for automatic metal only ECO:
+use strict;
+setup_eco("eco_example");# Setup ECO name
+read_library("tsmc.5nm.lib");# Read in standard library
+# SVF files are optional, best to be used when the design involves multibit flops
+#read_svf("-ref", "reference.svf.txt");
+#read_svf("-imp", "implementation.svf.txt");
+read_design("-ref", "reference.gv");# Read in Reference Netlist
+read_design("-imp", "implementation.gv");
+set_top("topmod");# Set the top module
+set_ignore_output("scan_out*");
+set_pin_constant("scan_enable", 0);
+set_pin_constant("scan_mode", 0);
+read_lef("tsmc.lef"); # Read LEF
+read_def("topmod.def"); # Read Design Exchange Format file
+fix_design(); # Must run before get_spare_cells and map_spare_cells
+get_spare_cells("*/*_SPARE*");
+map_spare_cells();
+report_eco(); # ECO report
+check_design();# Check if the ECO causes any issue, like floating
+write_verilog("eco_verilog.v");# Write out ECO result in Verilog
+write_perl("eco_result.pl");# Write out result in Perl script
+run_lec(); # Run GOF LEC to generate Formality help files
+#End of automatic ECO script example
+#The four ECO script example is the same as the third ECO script, except fix_design
+# list_file option to load in the ECO points list file converted from RTL-to-RTL LEC result
+fix_design("-list_file", "the_eco_points.txt");
+#The 5th ECO script example is the same as the 3rd ECO script, except fix_design
+# Enable flatten mode ECO. The default mode is hierarchical. The flatten mode is for small fix but the changes go across
+# module boundaries
+fix_design("-flatten");
+#The 6th ECO script is similar to the third ECO script, but it dumps formality help file after LEC
+run_lec(); # Run GOF LEC to generate Formality help files
+write_compare_points("compare_points.report");
+write_formality_help_files("fm_dir/formality_help"); # formality_help files are generated in fm_dir folder
+#The 7th ECO script is similar to the third ECO script, but it uses gate array spare cells
+fix_design(); # Must run before get_spare_cells and map_spare_cells
+# Enable Gate Array Spare Cells Metal Only ECO Flow, map_spare_cells will map to Gate Array Cells only
+get_spare_cells("-gate_array", "G*", "-gate_array_filler", "GFILL*|GDCAP*");
+map_spare_cells();
+#The 8th ECO script is similar to the third ECO script, but it uses only deleted gates or freed up gates in ECO as spare cells
+fix_design(); # Must run before get_spare_cells and map_spare_cells
+get_spare_cells("-addfreed");
+map_spare_cells();
+#The 9th ECO script is manual ECO, find all memory hierarchically and tie the pin TEST_SHIFT of memory to net "TEST_EN"
+use strict;
+setup_eco("eco_example");
+read_library("tsmc.3nm.lib");
+read_design("-imp", "from_backend.gv");
+set_top("topmod");
+# Get all memories hierarchically, instance naming, "U_HMEM*"
+my @mems = get_cells("-hier", "U_HMEM*");
+foreach my $mem (@mems){
+    change_pin("$mem/TEST_SHIFT", "TEST_EN");
+}
+report_eco(); # ECO report
+check_design();
+write_verilog("mem_eco.v");
+    """
+    return exp