프로젝트/AWS winter camp - ELK&AWS 프로젝트

Elasticsearch NLP에 ChatGPT & streamlit에 n-gram 추가하기(실패)

dayeonsheep 2024. 2. 1. 01:43

 

Elasticsearch NLP 검색을 접목한 ChatGPT 답변을 Python 코드로 구현

하는 원래 코드 ↓

# openai, streamlit, localtunnel 설치
!pip install openai==0.28
!pip install streamlit
!npm install localtunnel
!pip install -U typing_extensions
!pip install elasticsearch
!pip install googletrans==4.0.0-rc1

 

# ↓↓전체 코드 작성 완료 후 아래 주석 해제하고 실행↓↓

%%writefile elastic_gpt_app.py

# ↑↑ 전체 코드 작성 완료 후 위 주석 해제하고 실행 ↑↑

import os
import streamlit as st
import openai
from elasticsearch import Elasticsearch

######################## TO DO ########################

# openai apy key 입력
openai_api = ''

#######################################################

openai.api_key = openai_api
model = "gpt-3.5-turbo-0301"

def es_connect(cid, user, passwd):
    es = Elasticsearch(cloud_id=cid, http_auth=(user, passwd))
    return es

# Elasticsearch Search
def search(query_text):
######################## TO DO ########################
# ES Cloud ID, Username, Password 입력
    cid = ''
    cu = 'elastic'
    cp = 'password'
#######################################################
    es = es_connect(cid, cu, cp)

    # Elasticsearch query (BM25) and kNN configuration for hybrid search
    query = {
        "bool": {
            "must": [{
                "match": {
                    "title": {
                        "query": query_text,
                        "boost": 1
                    }
                }
            }],
            "filter": [{
                "exists": {
                    "field": "title"
                }
            }]
        }
    }

    knn = {
        "field": "title-vector",
        "k": 1,
        "num_candidates": 20,
        "query_vector_builder": {
            "text_embedding": {
                "model_id": "sentence-transformers__all-distilroberta-v1",
                "model_text": query_text
            }
        },
        "boost": 24
    }

    fields = ["title", "body_content", "url"]
    index = 'search-elastic-docs'
    resp = es.search(index=index,
                     query=query,
                     knn=knn,
                     fields=fields,
                     size=1,
                     source=False)

    body = resp['hits']['hits'][0]['fields']['body_content'][0]
    url = resp['hits']['hits'][0]['fields']['url'][0]

    return body, url

def truncate_text(text, max_tokens):
    tokens = text.split()
    if len(tokens) <= max_tokens:
        return text

    return ' '.join(tokens[:max_tokens])

# Chat_gpt 답변 생성
def chat_gpt(prompt, model="gpt-3.5-turbo", max_tokens=1024, max_context_tokens=4000, safety_margin=5):
    # Truncate the prompt content to fit within the model's context length
    truncated_prompt = truncate_text(prompt, max_context_tokens - max_tokens - safety_margin)

    response = openai.ChatCompletion.create(model=model,
                                            messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": truncated_prompt}])

    return response["choices"][0]["message"]["content"]

st.title("ElasticDocs GPT")


from googletrans import Translator

def translate_to_korean(text):
    translator = Translator()
    translated_text = translator.translate(text, dest='ko').text
    return translated_text

# 입력창
with st.form("chat_form"):
    query = st.text_input("You: ")
    submit_button = st.form_submit_button("Send")

# 답변 출력
negResponse = "NO"
if submit_button:
    resp, url = search(query)
    prompt = f"Answer this question in korean: {query}\nUsing only the information from this Elastic Doc: {resp}\nIf the answer is not contained in the supplied doc reply '{negResponse}' and nothing else"
    answer = chat_gpt(prompt)

    translated_answer = translate_to_korean(answer)

    if negResponse in translated_answer:
        st.write(f"ChatGPT: {translated_answer.strip()}")
    else:
        st.write(f"ChatGPT: {translated_answer.strip()}\n\nDocs: {url}")

 

- Streamlit web UI에서 보기

# Streamlit 실행
!streamlit run /content/elastic_gpt_app.py &>/content/logs.txt &
# Endpoint IP 출력
import urllib
print("IP Endpoint:",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip("\n"))
# 임시 URL 발급
!npx localtunnel --port 8501

 

 

-> ChatGPT 답변 받는 검색창을 n-gram 검색창으로 이용하도록 코드 수정해보기

중간 부분만 n-gram 추가해서 고쳐봤음

%%writefile elastic_gpt_app.py

# ↑↑ 전체 코드 작성 완료 후 위 주석 해제하고 실행 ↑↑

import os
import streamlit as st
from elasticsearch import Elasticsearch

# openai api key 입력
openai_api = ''

openai.api_key = openai_api
model = "gpt-3.5-turbo-0301"

def es_connect(cid, user, passwd):
    es = Elasticsearch(cloud_id=cid, http_auth=(user, passwd))
    return es

# Elasticsearch Search with n-gram
def search_ngram(query_text):
    ######################## TO DO ########################
    # ES Cloud ID, Username, Password 입력
    cid = ''
    cu = 'elastic'
    cp = 'password'
    #######################################################
    es = es_connect(cid, cu, cp)

    # Elasticsearch n-gram query
    query = {
        "match": {
            "title": {
                "query": query_text,
                "operator": "and",
                "analyzer": "my_analyzer"  # Use the custom n-gram analyzer
            }
        }
    }

    fields = ["title", "body_content", "url"]
    index = 'search-elastic-docs'
    resp = es.search(index=index,
                     query=query,
                     fields=fields,
                     size=1,
                     source=False)

    if resp['hits']['hits']:
        body = resp['hits']['hits'][0]['fields']['body_content'][0]
        url = resp['hits']['hits'][0]['fields']['url'][0]
        return body, url
    else:
        return None, None

def truncate_text(text, max_tokens):
    tokens = text.split()
    if len(tokens) <= max_tokens:
        return text

    return ' '.join(tokens[:max_tokens])

# Chat_gpt 답변 생성
def chat_gpt(prompt, model="gpt-3.5-turbo", max_tokens=1024, max_context_tokens=4000, safety_margin=5):
    # Truncate the prompt content to fit within the model's context length
    truncated_prompt = truncate_text(prompt, max_context_tokens - max_tokens - safety_margin)

    response = openai.ChatCompletion.create(model=model,
                                            messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": truncated_prompt}])

    return response["choices"][0]["message"]["content"]

st.title("ElasticDocs GPT")

from googletrans import Translator

def translate_to_korean(text):
    translator = Translator()
    translated_text = translator.translate(text, dest='ko').text
    return translated_text

# 입력창
with st.form("chat_form"):
    query = st.text_input("You: ")
    submit_button = st.form_submit_button("Send")

# 답변 출력
negResponse = "NO"
if submit_button:
    resp, url = search_ngram(query)
    
    if resp is not None:
        st.write(f"Search Result: {resp}\n\nDocs: {url}")
    else:
        st.write(f"No matching documents found.")​

응 안돼... 

 

import openai를 추가해줬는데 안돌아간다... 

pip install 부터 오류가 난다...