Skip to content

Commit

Permalink
process guess
Browse files Browse the repository at this point in the history
  • Loading branch information
surisdi committed Dec 22, 2023
1 parent 12782b6 commit ac2fa26
Show file tree
Hide file tree
Showing 4 changed files with 191 additions and 26 deletions.
1 change: 1 addition & 0 deletions configs/base_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ best_match_model: xvlm # Which model to use for bes
gpt3: # GPT-3 configuration
n_votes: 1 # Number of tries to use for GPT-3. Use with temperature > 0
qa_prompt: ./prompts/gpt3/gpt3_qa.txt
guess_prompt: ./prompts/gpt3/gpt3_process_guess.txt
temperature: 0. # Temperature for GPT-3. Almost deterministic if 0
model: text-davinci-003 # See openai.Model.list() for available models

Expand Down
4 changes: 4 additions & 0 deletions image_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,10 @@ def llm_query(query, context=None, long_answer=True, queues=None):
return forward(model_name='gpt3_qa', prompt=[query, context], queues=queues)


def process_guesses(prompt, guess1=None, guess2=None, queues=None):
return forward(model_name='gpt3_guess', prompt=[prompt, guess1, guess2], queues=queues)


def coerce_to_numeric(string, no_string=False):
"""
This function takes a string as input and returns a numeric value after removing any non-numeric characters.
Expand Down
121 changes: 121 additions & 0 deletions prompts/gpt3/gpt3_process_guess.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
Please answer the following questions using the given guesses.
If a unique answer cannot be determined, choose only one of the possible answers.
Aim to reply in ONE word (at MOST 2).

Question: What kind of flowers are these?
Guess 1: these flowers are purple, so lavender, lilac, iris, and hyacinth
Guess 2: purple flowers
Answer: lilac

Question: What do these people on the bikes normally write and give out?
Guess 1: the people on bikes are police, so Tickets
Guess 2: tickets
Answer: tickets

Question: What kind of cold meet is this?
Guess 1: what kind of meat is this is beef, so roast beef
Guess 2: beef
Answer: beef

Question: Can you guess the place shown in this picture?
Guess 1: the place is tourist attraction, so the Eiffel Tower in Paris, France
Guess 2: big ben
Answer: big ben

Question: When was this type of vehicle with two equal sized wheels invented?
Guess 1: the vehicle is a bicycle, so 19th century
Guess 2: 1819
Answer: 1800s

Question: What is the flavor of the pink topping on this dessert?
Guess 1: the topping is whipped cream, so strawberry, vanilla, chocolate, and raspberry
Guess 2: strawberry
Answer: strawberry

Question: How are these festive lights held in place?
Guess 1: these festive lights are christmas lights, so with hooks clips
Guess 2: string
Answer: string

Question: Who is famous for allegedly doing this in a lightning storm?
Guess 1: what is being done is flying a kite, so Benjamin Franklin
Guess 2: Charles Manson
Answer: Benjamin Franklin

Question: What is the object atop the skier's head used for?
Guess 1: the object atop the skier's head is helmet, so protection from head injuries
Guess 2: sunglasses
Answer: protection

Question: What rank is the man on the right?
Guess 1: who is the man on the right is sailor, so seaman
Guess 2: captain
Answer: captain

Question: Chemically what kind of water is in the picture?
Guess 1: the water in the picture is waves, so salt water
Guess 2: salt water
Answer: salt

Question: Is the material tweed or canvas?
Guess 1: the material is fabric, so fabric
Guess 2: canvas
Answer: canvas

Question: Which type of meat are in the photo?
Guess 1: the meat in the photo is sausage, so pork
Guess 2: hot dogs
Answer: hotdogs

Question: What sort of predator might there be in an area like this?
Guess 1: this area is mountains, so predators like wolves fox
Guess 2: shark
Answer: shark

Question: Can you name a sport this person could be a part of?
Guess 1: this person is a racer, so racing such as auto
Guess 2: motorcycle racing
Answer: racing

Question: Who makes the yellow top worn in this photograph?
Guess 1: the top is red, so brand is unknown
Guess 2: Burton
Answer: Burton

Question: Is the athlete right or left handed?
Guess 1: what is the athlete doing is playing baseball, so unclear
Guess 2: right handed
Answer: right handed

Question: Is this food high or low on fat?
Guess 1: what kind of food is this is sandwich, so depends on ingredients
Guess 2: high
Answer: high

Question: What wood are those cabinets made of?
Guess 1: what kind of cabinets are these is kitchen cabinets, so typically wood such as oak
Guess 2: maple

Question: Which objects shown are typically associated with small children?
Guess 1: what objects are shown are stuffed animals, so toys
Guess 2: teddy bears
Answer: teddy bears

Question: What small appliance is that stuffed animal inside?
Guess 1: the stuffed animal is a teddy bear, so vacuum cleaner
Guess 2: microwave
Answer: microwave

Question: What is this made with?
Guess 1: what is this is muffin, so flour sugar eggs
Guess 2: oats
Answer: flour

Question: What is the position name of the player squatting down?
Guess 1: who is squatting down is the batter, so hitter
Guess 2: catcher

Question: {}
Guess 1: {}
Guess 2: {}
Answer (remember, only 1-2 words):
91 changes: 65 additions & 26 deletions vision_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def list_processes(cls):
"""
return [cls.name]


# ------------------------------ Specific models ---------------------------- #


Expand Down Expand Up @@ -381,7 +382,7 @@ def forward(self, image: torch.Tensor, text: List[str], return_labels: bool = Fa
text = [text]
text_original = text
text = ['a photo of a ' + t for t in text]
inputs = self.processor(text=text, images=image, return_tensors="pt") # padding="longest",
inputs = self.processor(text=text, images=image, return_tensors="pt") # padding="longest",
inputs = {k: v.to(self.dev) for k, v in inputs.items()}
outputs = self.model(**inputs)

Expand Down Expand Up @@ -512,7 +513,7 @@ def compute_prediction(self, original_image, original_caption, custom_entity=Non
tic = timeit.time.perf_counter()

# compute predictions
with HiddenPrints(): # Hide some deprecated notices
with HiddenPrints(): # Hide some deprecated notices
predictions = self.model(image_list, captions=[original_caption],
positive_map=positive_map_label_to_token)
predictions = [o.to(self.cpu_device) for o in predictions]
Expand Down Expand Up @@ -779,6 +780,8 @@ def __init__(self, gpu_number=0):
super().__init__(gpu_number=gpu_number)
with open(config.gpt3.qa_prompt) as f:
self.qa_prompt = f.read().strip()
with open(config.gpt3.guess_prompt) as f:
self.guess_prompt = f.read().strip()
self.temperature = config.gpt3.temperature
self.n_votes = config.gpt3.n_votes
self.model = config.gpt3.model
Expand All @@ -802,7 +805,40 @@ def most_frequent(answers):
answer_counts = Counter(answers)
return answer_counts.most_common(1)[0][0]

def get_qa(self, prompts, prompt_base: str=None) -> list[str]:
def process_guesses(self, prompts):
prompt_base = self.guess_prompt
prompts_total = []
for p in prompts:
question, guess1, _ = p
if len(guess1) == 1:
# In case only one option is given as a guess
guess1 = [guess1[0], guess1[0]]
prompts_total.append(prompt_base.format(question, guess1[0], guess1[1]))
response = self.process_guesses_fn(prompts_total)
if self.n_votes > 1:
response_ = []
for i in range(len(prompts)):
if self.model == 'chatgpt':
resp_i = [r['message']['content'] for r in
response['choices'][i * self.n_votes:(i + 1) * self.n_votes]]
else:
resp_i = [r['text'] for r in response['choices'][i * self.n_votes:(i + 1) * self.n_votes]]
response_.append(self.most_frequent(resp_i).lstrip())
response = response_
else:
if self.model == 'chatgpt':
response = [r['message']['content'].lstrip() for r in response['choices']]
else:
response = [r['text'].lstrip() for r in response['choices']]
return response

def process_guesses_fn(self, prompt):
# The code is the same as get_qa_fn, but we separate in case we want to modify it later
response = self.query_gpt3(prompt, model=self.model, max_tokens=5, logprobs=1, stream=False,
stop=["\n", "<|endoftext|>"])
return response

def get_qa(self, prompts, prompt_base: str = None) -> list[str]:
if prompt_base is None:
prompt_base = self.qa_prompt
prompts_total = []
Expand All @@ -814,8 +850,8 @@ def get_qa(self, prompts, prompt_base: str=None) -> list[str]:
response_ = []
for i in range(len(prompts)):
if self.model == 'chatgpt':
resp_i = [r['message']['content']
for r in response['choices'][i * self.n_votes:(i + 1) * self.n_votes]]
resp_i = [r['message']['content'] for r in
response['choices'][i * self.n_votes:(i + 1) * self.n_votes]]
else:
resp_i = [r['text'] for r in response['choices'][i * self.n_votes:(i + 1) * self.n_votes]]
response_.append(self.most_frequent(resp_i))
Expand Down Expand Up @@ -891,6 +927,8 @@ def forward(self, prompt, process_name):
if len(prompt) > 0:
if process_name == 'gpt3_qa':
response = self.get_qa(prompt)
elif process_name == 'gpt3_guess':
response = self.process_guesses(prompt)
else: # 'gpt3_general', general prompt, has to be given all of it
response = self.get_general(prompt)
else:
Expand All @@ -911,7 +949,7 @@ def forward(self, prompt, process_name):

@classmethod
def list_processes(cls):
return ['gpt3_' + n for n in ['qa', 'general']]
return ['gpt3_' + n for n in ['qa', 'guess', 'general']]


# @cache.cache
Expand All @@ -924,24 +962,26 @@ def codex_helper(extended_prompt):
if not isinstance(extended_prompt, list):
extended_prompt = [extended_prompt]
responses = [openai.ChatCompletion.create(
model=config.codex.model,
messages=[
# {"role": "system", "content": "You are a helpful assistant."},
{"role": "system", "content": "Only answer with a function starting def execute_command."},
{"role": "user", "content": prompt}
],
temperature=config.codex.temperature,
max_tokens=config.codex.max_tokens,
top_p = 1.,
frequency_penalty=0,
presence_penalty=0,
# best_of=config.codex.best_of,
stop=["\n\n"],
)
for prompt in extended_prompt]
resp = [r['choices'][0]['message']['content'].replace("execute_command(image)", "execute_command(image, my_fig, time_wait_between_lines, syntax)") for r in responses]
# if len(resp) == 1:
# resp = resp[0]
model=config.codex.model,
messages=[
# {"role": "system", "content": "You are a helpful assistant."},
{"role": "system", "content": "Only answer with a function starting def execute_command."},
{"role": "user", "content": prompt}
],
temperature=config.codex.temperature,
max_tokens=config.codex.max_tokens,
top_p=1.,
frequency_penalty=0,
presence_penalty=0,
# best_of=config.codex.best_of,
stop=["\n\n"],
)
for prompt in extended_prompt]
resp = [r['choices'][0]['message']['content'].replace("execute_command(image)",
"execute_command(image, my_fig, time_wait_between_lines, syntax)")
for r in responses]
# if len(resp) == 1:
# resp = resp[0]
else:
warnings.warn('OpenAI Codex is deprecated. Please use GPT-4 or GPT-3.5-turbo.')
response = openai.Completion.create(
Expand Down Expand Up @@ -1161,7 +1201,7 @@ def caption(self, image, prompt=None):
generated_text = [cap.strip() for cap in
self.processor.batch_decode(generated_ids, skip_special_tokens=True)]
return generated_text

def pre_question(self, question):
# from LAVIS blip_processors
question = re.sub(
Expand Down Expand Up @@ -1223,7 +1263,6 @@ class SaliencyModel(BaseModel):

def __init__(self, gpu_number=0,
path_checkpoint=f'{config.path_pretrained_models}/saliency_inspyrenet_plus_ultra'):

from base_models.inspyrenet.saliency_transforms import get_transform
from base_models.inspyrenet.InSPyReNet import InSPyReNet
from base_models.inspyrenet.backbones.SwinTransformer import SwinB
Expand Down

0 comments on commit ac2fa26

Please sign in to comment.