From d5eaec5d90ff7a2cfce1b43504f279b321363eb6 Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 13 Sep 2024 02:28:03 -0700 Subject: [PATCH 01/80] refactor weight setter and validators --- .gitignore | 3 +- miner/services/__init__.py | 3 +- .../services/validators/base_validator.py | 34 +- .../services/validators/image_validator.py | 31 +- .../services/validators/text_validator.py | 52 +-- validators/weight_setter.py | 365 ++++++++++++------ 6 files changed, 276 insertions(+), 212 deletions(-) diff --git a/.gitignore b/.gitignore index e8cec8aa..726492f6 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,5 @@ wandb/ validators/.ipynb_checkpoints/ **/validator.ipynb **/.env -**/Cortex.t.egg-info \ No newline at end of file +**/Cortex.t.egg-info +**/test.ipynb \ No newline at end of file diff --git a/miner/services/__init__.py b/miner/services/__init__.py index 0823ea8d..f7388167 100644 --- a/miner/services/__init__.py +++ b/miner/services/__init__.py @@ -2,9 +2,8 @@ from .prompt import PromptService from .image import ImageService from .embedding import EmbeddingService -from .text import TextService from .check_status import IsAliveService from .capacity import CapacityService -ALL_SERVICE_TYPE = Union[PromptService, ImageService, EmbeddingService, TextService, IsAliveService, CapacityService] +ALL_SERVICE_TYPE = Union[PromptService, ImageService, EmbeddingService, IsAliveService, CapacityService] __all__ = [PromptService, ImageService, EmbeddingService, CapacityService, ALL_SERVICE_TYPE] diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index 50e7d7cf..7974f08b 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -7,7 +7,6 @@ import bittensor as bt from cortext.metaclasses import ValidatorRegistryMeta -from cortext import utils dataset = None @@ -28,37 +27,6 @@ def __init__(self, config, metagraph): self.num_samples = 100 self.wandb_data = {} - def get_random_texts(self) -> list[str]: - global dataset - if dataset is None: - dataset = load_dataset('wikitext', 'wikitext-2-v1') - texts = [item['text'] for item in dataset['train']] - return random.sample(texts, self.num_samples) - - async def load_questions(self, available_uids, item_type: str = "text", vision=False): - self.uid_to_questions = dict() - - for index, uid in enumerate(available_uids): - - if item_type == "images": - content = await utils.get_question("images", len(available_uids)) - self.uid_to_questions[uid] = content # Store messages for each UID - elif item_type == "text": - question = await utils.get_question("text", len(available_uids), vision) - if isinstance(question, str): - bt.logging.info(f"Question is str, dict expected: {question}") - prompt = question.get("prompt") - image_url = question.get("image") - self.uid_to_questions[uid] = {"prompt": prompt} - self.uid_to_questions[uid]["image"] = image_url - else: - random_texts = self.get_random_texts() - num_texts_per_uid = len(random_texts) // len(available_uids) - start_index = index * num_texts_per_uid - end_index = start_index + num_texts_per_uid - prompt = random_texts[start_index:end_index] - self.uid_to_questions[uid] = prompt - async def query_miner(self, metagraph, uid, syn): try: responses = await self.dendrite([metagraph.axons[uid]], syn, deserialize=False, timeout=self.timeout, @@ -75,7 +43,7 @@ async def handle_response(self, uid, response) -> Tuple[int, bt.Synapse]: return uid, response @abstractmethod - async def start_query(self, available_uids: List[int]) -> bt.Synapse: + async def create_query(self, uid): pass @abstractmethod diff --git a/validators/services/validators/image_validator.py b/validators/services/validators/image_validator.py index 0bcd2432..e42b1b15 100644 --- a/validators/services/validators/image_validator.py +++ b/validators/services/validators/image_validator.py @@ -8,6 +8,7 @@ from validators.services.validators.base_validator import BaseValidator from validators import utils from validators.utils import error_handler +from cortext.utils import get_question import bittensor as bt @@ -48,26 +49,16 @@ def select_random_provider_and_model(self): elif self.provider == "OpenAI": self.model = "dall-e-3" - async def start_query(self, available_uids): - try: - query_tasks = [] + async def get_question(self): + question = await get_question("image", 1) + return question - self.select_random_provider_and_model() - await self.load_questions(available_uids, "images") - - # Query all images concurrently - for uid, content in self.uid_to_questions.items(): - syn = ImageResponse(messages=content, model=self.model, size=self.size, quality=self.quality, - style=self.style, provider=self.provider, seed=self.seed, steps=self.steps) - bt.logging.info(f"uid = {uid}, syn = {syn}") - task = self.query_miner(self.metagraph, uid, syn) - query_tasks.append(task) - - # Query responses is (uid. syn) - query_responses = await asyncio.gather(*query_tasks) - return query_responses - except: - bt.logging.error(f"error in start_query {traceback.format_exc()}") + async def create_query(self, uid) -> bt.Synapse: + question = await self.get_question() + syn = ImageResponse(messages=question, model=self.model, size=self.size, quality=self.quality, + style=self.style, provider=self.provider, seed=self.seed, steps=self.steps) + bt.logging.info(f"uid = {uid}, syn = {syn}") + return syn def should_i_score(self): rand = random.random() @@ -82,7 +73,7 @@ async def get_scoring_task(self, uid, answer, response: ImageResponse): return 0 image_url = completion["url"] score = await cortext.reward.dalle_score(uid, image_url, self.size, response.messages, - self.weight) + self.weight) else: score = 0 # cortext.reward.deterministic_score(uid, syn, self.weight) return score diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index e739e3f9..d6546828 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -17,6 +17,7 @@ class TextValidator(BaseValidator): gen_should_i_score = get_should_i_score_arr_for_text() + def __init__(self, config, provider: str = None, model: str = None, metagraph=None): super().__init__(config, metagraph) self.streaming = True @@ -82,39 +83,24 @@ async def handle_response(self, uid: str, responses) -> tuple[str, str]: break return uid, full_response - async def start_query(self, available_uids): - try: - self.select_random_provider_and_model() - is_vision_model = self.model in constants.VISION_MODELS - await self.load_questions(available_uids, "text", is_vision_model) - - query_tasks = [] - bt.logging.trace(f"provider = {self.provider} model = {self.model}") - for uid, question in self.uid_to_questions.items(): - prompt = question.get("prompt") - image = question.get("image") - if image: - messages = [{'role': 'user', 'content': prompt, "image": image}] - else: - messages = [{'role': 'user', 'content': prompt}] - - syn = StreamPrompting(messages=messages, model=self.model, seed=self.seed, max_tokens=self.max_tokens, - temperature=self.temperature, provider=self.provider, top_p=self.top_p, - top_k=self.top_k) - - image = image if image else '' - bt.logging.info( - f"Sending {syn.model} {self.query_type} request to uid: {uid}, " - f"timeout {self.timeout}: {syn.messages[0]['content']} {image}" - ) - task = self.query_miner(self.metagraph, uid, syn) - query_tasks.append(task) - - query_responses = await asyncio.gather(*query_tasks) - - return query_responses - except Exception as err: - bt.logging.exception(err) + async def get_question(self): + is_vision_model = self.model in constants.VISION_MODELS + question = await get_question("text", 1, is_vision_model) + return question + + async def create_query(self, uid) -> bt.Synapse: + question = await self.get_question() + prompt = question.get("prompt") + image = question.get("image") + if image: + messages = [{'role': 'user', 'content': prompt, "image": image}] + else: + messages = [{'role': 'user', 'content': prompt}] + + syn = StreamPrompting(messages=messages, model=self.model, seed=self.seed, max_tokens=self.max_tokens, + temperature=self.temperature, provider=self.provider, top_p=self.top_p, + top_k=self.top_k) + return syn def select_random_provider_and_model(self): # AnthropicBedrock should only be used if a validators' anthropic account doesn't work diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 8c1d52ab..963fa861 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -6,7 +6,6 @@ from substrateinterface import SubstrateInterface from functools import partial from typing import Tuple -import wandb import bittensor as bt import cortext @@ -15,17 +14,14 @@ from cortext.protocol import IsAlive, StreamPrompting, ImageResponse, Embeddings from cortext.metaclasses import ValidatorRegistryMeta -from validators.services import BaseValidator, TextValidator, CapacityService +from validators.services import CapacityService scoring_organic_timeout = 60 class WeightSetter: def __init__(self, config): - self.uid_to_capacity = {} self.available_uids = None - self.NUM_QUERIES_PER_UID = 10 - self.remaining_queries = [] bt.logging.info("Initializing WeightSetter") self.config = config self.wallet = config.wallet @@ -36,11 +32,19 @@ def __init__(self, config): self.my_uid = self.metagraph.hotkeys.index(self.wallet.hotkey.ss58_address) bt.logging.info(f"Running validator on subnet: {self.netuid} with uid: {self.my_uid}") - # Initialize scores + # Scoring and querying parameters + self.MIN_SCORED_QUERIES = 10 # Minimum number of times each UID should be scored per epoch + self.scoring_percent = 0.1 # Percentage of total queries that will be scored + self.TOTAL_QUERIES_PER_UID = int(self.MIN_SCORED_QUERIES / self.scoring_percent) + bt.logging.info(f"Each UID will receive {self.TOTAL_QUERIES_PER_UID} total queries, " + f"with {self.MIN_SCORED_QUERIES} of them being scored.") + + # Initialize scores and counts self.total_scores = {} - self.score_counts = {} + self.score_counts = {} # Number of times a UID has been scored + self.total_queries_sent = {} # Total queries sent to each UID self.moving_average_scores = None - + # Set up axon and dendrite self.axon = bt.axon(wallet=self.wallet, config=self.config) bt.logging.info(f"Axon server started on port {self.config.axon.port}") @@ -49,11 +53,9 @@ def __init__(self, config): # Set up async-related attributes self.lock = asyncio.Lock() self.loop = asyncio.get_event_loop() - self.request_timestamps = {} - self.organic_scoring_tasks = set() - # Initialize prompt cache - self.prompt_cache = {} + # Initialize shared query database + self.query_database = [] # Get network tempo self.tempo = self.subtensor.tempo(self.netuid) @@ -61,8 +63,9 @@ def __init__(self, config): # Set up async tasks self.thread_executor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix='asyncio') - self.loop.create_task(self.consume_organic_scoring()) - self.loop.create_task(self.perform_synthetic_scoring_and_update_weights()) + self.loop.create_task(self.consume_organic_queries()) + self.loop.create_task(self.perform_synthetic_queries()) + self.loop.create_task(self.process_queries_from_database()) async def run_sync_in_async(self, fn): return await self.loop.run_in_executor(self.thread_executor, fn) @@ -80,10 +83,10 @@ def get_last_update(self, block): except Exception as err: bt.logging.error(f"Error getting last update: {traceback.format_exc()}") bt.logging.exception(err) - # means that the validator is not registered yet. The validator should break if this is the case anyways + # Means that the validator is not registered yet. last_update_blocks = 1000 - bt.logging.trace(f"last set weights successfully {last_update_blocks} blocks ago") + bt.logging.trace(f"Last set weights successfully {last_update_blocks} blocks ago") return last_update_blocks def get_blocks_til_epoch(self, block): @@ -95,32 +98,51 @@ async def refresh_metagraph(self): async def initialize_uids_and_capacities(self): self.available_uids = await self.get_available_uids() bt.logging.info(f"Available UIDs: {list(self.available_uids.keys())}") - # self.uid_to_capacity = await self.get_capacities_for_uids(self.available_uids) - # bt.logging.info(f"Capacities for miners: {self.uid_to_capacity}") + self.uid_to_capacity = await self.get_capacities_for_uids(self.available_uids) + bt.logging.info(f"Capacities for miners: {self.uid_to_capacity}") + # Initialize total_scores, score_counts, and total_queries_sent self.total_scores = {uid: 0.0 for uid in self.available_uids.keys()} self.score_counts = {uid: 0 for uid in self.available_uids.keys()} - self.remaining_queries = self.shuffled(list(self.available_uids.keys()) * self.NUM_QUERIES_PER_UID) + self.total_queries_sent = {uid: 0 for uid in self.available_uids.keys()} async def update_and_refresh(self, last_update): - bt.logging.info(f"setting weights, last update {last_update} blocks ago") + bt.logging.info(f"Setting weights, last update {last_update} blocks ago") await self.update_weights() bt.logging.info("Refreshing metagraph...") await self.refresh_metagraph() bt.logging.info("Refreshing available UIDs...") - self.available_uids = await self.get_available_uids() - bt.logging.info(f"Available UIDs: {list(self.available_uids.keys())}") - - # bt.logging.info("Refreshing capacities...") - # self.uid_to_capacity = await self.get_capacities_for_uids(self.available_uids) - - self.total_scores = {uid: 0 for uid in self.available_uids.keys()} - self.score_counts = {uid: 0 for uid in self.available_uids.keys()} - self.remaining_queries = self.shuffled(list(self.available_uids.keys()) * self.NUM_QUERIES_PER_UID) - - - async def perform_synthetic_scoring_and_update_weights(self): + new_available_uids = await self.get_available_uids() + bt.logging.info(f"Available UIDs: {list(new_available_uids.keys())}") + + bt.logging.info("Refreshing capacities...") + self.uid_to_capacity = await self.get_capacities_for_uids(new_available_uids) + + # Update total_scores, score_counts, and total_queries_sent + # Remove UIDs that are no longer available + for uid in list(self.total_scores.keys()): + if uid not in new_available_uids: + del self.total_scores[uid] + del self.score_counts[uid] + del self.total_queries_sent[uid] + + # Add new UIDs + for uid in new_available_uids: + if uid not in self.total_scores: + self.total_scores[uid] = 0.0 + self.score_counts[uid] = 0 + self.total_queries_sent[uid] = 0 + + # Reset counts for new epoch + for uid in self.total_scores.keys(): + self.total_scores[uid] = 0.0 + self.score_counts[uid] = 0 + self.total_queries_sent[uid] = 0 + + self.available_uids = new_available_uids + + async def perform_synthetic_queries(self): while True: if self.available_uids is None: await self.initialize_uids_and_capacities() @@ -130,37 +152,86 @@ async def perform_synthetic_scoring_and_update_weights(self): if last_update >= self.tempo * 2 or ( self.get_blocks_til_epoch(current_block) < 10 and last_update >= self.weights_rate_limit): - await self.update_and_refresh(last_update) - - if not self.remaining_queries: - bt.logging.info("No more queries to perform until next epoch.") - continue - bt.logging.debug(f"not setting weights, last update {last_update} blocks ago, " - f"{self.get_blocks_til_epoch(current_block)} blocks til epoch") - - selected_validator = self.select_validator() - num_uids_to_query = min(self.config.max_miners_cnt, len(self.remaining_queries)) + # Decide which UIDs to query, considering total queries sent + async with self.lock: + # Select UIDs that have not reached TOTAL_QUERIES_PER_UID + uids_to_query = [uid for uid in self.available_uids + if self.total_queries_sent[uid] < self.TOTAL_QUERIES_PER_UID] - # Pop UIDs to query from the remaining_queries list - uids_to_query = [self.remaining_queries.pop() for _ in range(num_uids_to_query)] - uid_to_scores = await self.process_modality(selected_validator, uids_to_query) + if not uids_to_query: + bt.logging.info("All UIDs have received the maximum number of total queries.") + await asyncio.sleep(1) + continue - bt.logging.info(f"Remaining queries: {len(self.remaining_queries)}") + # Prioritize UIDs with least total_queries_sent + uids_to_query.sort(key=lambda uid: self.total_queries_sent[uid]) - if uid_to_scores is None: - bt.logging.trace("uid_to_scores is None.") - continue + # Limit the number of UIDs to query based on configuration + num_uids_to_query = min(self.config.max_miners_cnt, len(uids_to_query)) + uids_to_query = uids_to_query[:num_uids_to_query] - for uid, score in uid_to_scores.items(): - async with self.lock: - self.total_scores[uid] += score - self.score_counts[uid] += 1 + selected_validator = self.select_validator() + + # Perform synthetic queries + query_responses = await self.perform_queries(selected_validator, uids_to_query) + + # Store queries and responses in the shared database + async with self.lock: + for uid, response_data in query_responses: + # Update total_queries_sent + self.total_queries_sent[uid] += 1 + + # Decide whether to score this query + if self.should_i_score(): + self.query_database.append({ + 'uid': uid, + 'synapse': response_data['query'], + 'response': response_data['response'], + 'query_type': 'synthetic', + 'timestamp': asyncio.get_event_loop().time(), + 'validator': selected_validator + }) + # If not scoring, we can still log the query if needed + + bt.logging.info(f"Performed synthetic queries for UIDs: {uids_to_query}") # Slow down the validator steps if necessary await asyncio.sleep(1) + def should_i_score(self): + # Randomly decide whether to score this query based on scoring_percent + return random.random() < self.scoring_percent + + async def perform_queries(self, selected_validator, uids_to_query): + query_responses = [] + for uid in uids_to_query: + try: + query_syn = await selected_validator.create_query(uid) + response = await self.query_miner(uid, query_syn) + query_responses.append((uid, {'query': query_syn, 'response': response})) + except Exception as e: + bt.logging.error(f"Exception during query for uid {uid}: {e}") + continue + return query_responses + + async def query_miner(self, uid, synapse): + try: + axon = self.metagraph.axons[uid] + responses = await self.dendrite( + axons=[axon], + synapse=synapse, + deserialize=False, + timeout=synapse.timeout, + streaming=False, + ) + # Handle the response appropriately + return responses[0] # Assuming responses is a list + except Exception as e: + bt.logging.error(f"Exception during query for uid {uid}: {e}") + return None + def select_validator(self): rand = random.random() text_validator = ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) @@ -205,44 +276,19 @@ async def check_uid(self, axon, uid): bt.logging.error(f"Error checking UID {uid}: {err}") return None - @staticmethod - def shuffled(list_: list) -> list: - list_ = list_.copy() - random.shuffle(list_) - return list_ - - async def process_modality(self, selected_validator: BaseValidator, available_uids): - if not available_uids: - bt.logging.info("No available uids.") - return None - bt.logging.info(f"starting query {selected_validator.__class__.__name__} for miners {available_uids}") - query_responses = await selected_validator.start_query(available_uids) - - if not selected_validator.should_i_score(): - bt.logging.info("we don't score this time.") - return None - - bt.logging.debug(f"scoring query with query responses for " - f"these uids: {available_uids}") - uid_scores_dict, scored_responses, responses = await selected_validator.score_responses(query_responses) - wandb_data = await selected_validator.build_wandb_data(uid_scores_dict, responses) - if self.config.wandb_on and not wandb_data: - wandb.log(wandb_data) - bt.logging.success("wandb_log successful") - return uid_scores_dict - async def update_weights(self): - """Update weights based on average scores, using min-max normalization.""" + """Update weights based on average scores.""" bt.logging.info("Updating weights...") avg_scores = {} # Compute average scores per UID - for uid in self.total_scores: - count = self.score_counts[uid] - if count > 0: - avg_scores[uid] = self.total_scores[uid] / count - else: - avg_scores[uid] = 0.0 + async with self.lock: + for uid in self.total_scores: + count = self.score_counts[uid] + if count > 0: + avg_scores[uid] = self.total_scores[uid] / count + else: + avg_scores[uid] = 0.0 bt.logging.info(f"Average scores = {avg_scores}") @@ -254,7 +300,7 @@ async def update_weights(self): await self.set_weights(weights) async def set_weights(self, scores): - # alpha of .3 means that each new score replaces 30% of the weight of the previous weights + # Alpha of .3 means that each new score replaces 30% of the weight of the previous weights alpha = .3 if self.moving_average_scores is None: self.moving_average_scores = scores.clone() @@ -306,42 +352,58 @@ def base_blacklist(self, synapse, blacklist_amt=20000) -> Tuple[bool, str]: bt.logging.exception(err) async def images(self, synapse: ImageResponse) -> ImageResponse: - bt.logging.info(f"received {synapse}") - - synapse = await self.dendrite(self.metagraph.axons[synapse.uid], synapse, deserialize=False, - timeout=synapse.timeout) - - bt.logging.info(f"new synapse = {synapse}") - return synapse + bt.logging.info(f"Received {synapse}") + + axon = self.metagraph.axons[synapse.uid] + synapse_response = await self.dendrite(axon, synapse, deserialize=False, + timeout=synapse.timeout) + + bt.logging.info(f"New synapse = {synapse_response}") + # Store the query and response in the shared database + async with self.lock: + self.query_database.append({ + 'uid': synapse.uid, + 'synapse': synapse, + 'response': synapse_response, + 'query_type': 'organic', + 'timestamp': asyncio.get_event_loop().time(), + 'validator': ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, metagraph=self.metagraph) + }) + # Update total_queries_sent + self.total_queries_sent[synapse.uid] += 1 + + return synapse_response async def embeddings(self, synapse: Embeddings) -> Embeddings: - bt.logging.info(f"received {synapse}") - - synapse = await self.dendrite(self.metagraph.axons[synapse.uid], synapse, deserialize=False, - timeout=synapse.timeout) - - bt.logging.info(f"new synapse = {synapse}") - return synapse + bt.logging.info(f"Received {synapse}") + + axon = self.metagraph.axons[synapse.uid] + synapse_response = await self.dendrite(axon, synapse, deserialize=False, + timeout=synapse.timeout) + + bt.logging.info(f"New synapse = {synapse_response}") + # Store the query and response in the shared database + async with self.lock: + self.query_database.append({ + 'uid': synapse.uid, + 'synapse': synapse, + 'response': synapse_response, + 'query_type': 'organic', + 'timestamp': asyncio.get_event_loop().time(), + 'validator': ValidatorRegistryMeta.get_class('EmbeddingsValidator')(config=self.config, metagraph=self.metagraph) + }) + # Update total_queries_sent + self.total_queries_sent[synapse.uid] += 1 + + return synapse_response async def prompt(self, synapse: StreamPrompting) -> StreamPrompting: - bt.logging.info(f"received {synapse}") + bt.logging.info(f"Received {synapse}") - # Return the streaming response as before + # Return the streaming response async def _prompt(synapse, send: Send): bt.logging.info(f"Sending {synapse} request to uid: {synapse.uid}") - async def handle_response(responses): - for resp in responses: - async for chunk in resp: - if isinstance(chunk, str): - await send({ - "type": "http.response.body", - "body": chunk.encode("utf-8"), - "more_body": True, - }) - bt.logging.info(f"Streamed text: {chunk}") - await send({"type": "http.response.body", "body": b'', "more_body": False}) - axon = self.metagraph.axons[synapse.uid] responses = await self.dendrite( axons=[axon], @@ -350,12 +412,39 @@ async def handle_response(responses): timeout=synapse.timeout, streaming=True, ) - return await handle_response(responses) + + response_text = '' + + for resp in responses: + async for chunk in resp: + if isinstance(chunk, str): + await send({ + "type": "http.response.body", + "body": chunk.encode("utf-8"), + "more_body": True, + }) + bt.logging.info(f"Streamed text: {chunk}") + response_text += chunk + + await send({"type": "http.response.body", "body": b'', "more_body": False}) + + # Store the query and response in the shared database + async with self.lock: + self.query_database.append({ + 'uid': synapse.uid, + 'synapse': synapse, + 'response': response_text, + 'query_type': 'organic', + 'timestamp': asyncio.get_event_loop().time(), + 'validator': ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) + }) + # Update total_queries_sent + self.total_queries_sent[synapse.uid] += 1 token_streamer = partial(_prompt, synapse) return synapse.create_streaming_response(token_streamer) - async def consume_organic_scoring(self): + async def consume_organic_queries(self): bt.logging.info("Attaching forward function to axon.") self.axon.attach( forward_fn=self.prompt, @@ -370,10 +459,40 @@ async def consume_organic_scoring(self): self.axon.serve(netuid=self.netuid) self.axon.start() bt.logging.info(f"Running validator on uid: {self.my_uid}") + + async def process_queries_from_database(self): while True: - try: - # Check for organic scoring tasks here - await asyncio.sleep(60) - except Exception as err: - bt.logging.exception(err) - await asyncio.sleep(10) \ No newline at end of file + await asyncio.sleep(1) # Adjust the sleep time as needed + async with self.lock: + if not self.query_database: + continue + # Copy queries to process and clear the database + queries_to_process = self.query_database.copy() + self.query_database.clear() + + # Process queries outside the lock to prevent blocking + for query_data in queries_to_process: + uid = query_data['uid'] + synapse = query_data['synapse'] + response = query_data['response'] + validator = query_data['validator'] + + # Prepare query response data in the format expected by the validator + query_responses = [(uid, {'query': synapse, 'response': response})] + + # Score the response using the validator + try: + uid_scores_dict, _, _ = await validator.score_responses(query_responses) + except Exception as e: + bt.logging.error(f"Error scoring response for UID {uid}: {e}") + continue + + # Update total_scores and score_counts + async with self.lock: + for uid, score in uid_scores_dict.items(): + self.total_scores[uid] += score + self.score_counts[uid] += 1 + + # Stop scoring if MIN_SCORED_QUERIES reached + if self.score_counts[uid] >= self.MIN_SCORED_QUERIES: + bt.logging.info(f"UID {uid} has reached the minimum number of scored queries.") \ No newline at end of file From d2bf4fc22f8e476d5328121770f4824329634bb2 Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 13 Sep 2024 07:53:28 -0700 Subject: [PATCH 02/80] fix: bugs after upgradtion --- .../services/validators/base_validator.py | 4 ++ .../services/validators/image_validator.py | 2 +- .../services/validators/text_validator.py | 11 ---- validators/utils.py | 26 +++++++++ validators/validator.py | 5 +- validators/weight_setter.py | 54 +++++++++++-------- 6 files changed, 66 insertions(+), 36 deletions(-) diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index 7974f08b..fb82561e 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -37,6 +37,10 @@ async def query_miner(self, metagraph, uid, syn): bt.logging.error(f"Exception during query for uid {uid}: {e}") return uid, None + @abstractmethod + def select_random_provider_and_model(self): + pass + async def handle_response(self, uid, response) -> Tuple[int, bt.Synapse]: if type(response) == list and response: response = response[0] diff --git a/validators/services/validators/image_validator.py b/validators/services/validators/image_validator.py index e42b1b15..04ee7426 100644 --- a/validators/services/validators/image_validator.py +++ b/validators/services/validators/image_validator.py @@ -50,7 +50,7 @@ def select_random_provider_and_model(self): self.model = "dall-e-3" async def get_question(self): - question = await get_question("image", 1) + question = await get_question("images", 1) return question async def create_query(self, uid) -> bt.Synapse: diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index d6546828..c7ba1d93 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -72,17 +72,6 @@ async def organic(self, metagraph, query: dict[str, list[dict[str, str]]]) -> As bt.logging.trace(resp) yield uid, resp - async def handle_response(self, uid: str, responses) -> tuple[str, str]: - full_response = "" - for resp in responses: - async for chunk in resp: - if isinstance(chunk, str): - bt.logging.trace(chunk) - full_response += chunk - bt.logging.trace(f"full_response for uid {uid}: {full_response}") - break - return uid, full_response - async def get_question(self): is_vision_model = self.model in constants.VISION_MODELS question = await get_question("text", 1, is_vision_model) diff --git a/validators/utils.py b/validators/utils.py index b8ea92a6..3d318cb1 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -2,6 +2,7 @@ import asyncio import base64 import itertools +import inspect import bittensor as bt from PIL import Image @@ -39,6 +40,31 @@ async def wrapper(*args, **kwargs): return wrapper +async def handle_response_stream(responses) -> tuple[str, str]: + full_response = "" + async for chunk in responses: + if isinstance(chunk, str): + bt.logging.trace(chunk) + full_response += chunk + return full_response + + +def handle_response(func): + @wraps(func) + async def wrapper(*args, **kwargs): + try: + response = await func(*args, **kwargs) + if inspect.isasyncgen(response): + return await handle_response_stream(response) + else: + return response + except Exception as err: + bt.logging.exception(f"Exception during query for uid {args[1]}, {err}") + return None + + return wrapper + + def get_should_i_score_arr_for_text(): for i in itertools.count(): yield (i % 5) != 0 diff --git a/validators/validator.py b/validators/validator.py index 9ec4507f..a2bbc0a5 100644 --- a/validators/validator.py +++ b/validators/validator.py @@ -15,6 +15,7 @@ load_dotenv() random.seed(time.time()) + class NestedNamespace(argparse.Namespace): def __setattr__(self, name, value): if '.' in name: @@ -115,9 +116,11 @@ def main(): Config.check_required_env_vars() args = parse_arguments() config = Config(args) + + setup_logging(config) + config.wallet = bt.wallet(name=config.wallet.name, hotkey=config.wallet.hotkey) config.dendrite = bt.dendrite(wallet=config.wallet) - setup_logging(config) bt.logging.info(f"Config: {vars(config)}") diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 963fa861..d6d5dbef 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -14,7 +14,8 @@ from cortext.protocol import IsAlive, StreamPrompting, ImageResponse, Embeddings from cortext.metaclasses import ValidatorRegistryMeta -from validators.services import CapacityService +from validators.services import CapacityService, BaseValidator +from validators.utils import handle_response scoring_organic_timeout = 60 @@ -34,15 +35,15 @@ def __init__(self, config): # Scoring and querying parameters self.MIN_SCORED_QUERIES = 10 # Minimum number of times each UID should be scored per epoch - self.scoring_percent = 0.1 # Percentage of total queries that will be scored + self.scoring_percent = 0.1 # Percentage of total queries that will be scored self.TOTAL_QUERIES_PER_UID = int(self.MIN_SCORED_QUERIES / self.scoring_percent) bt.logging.info(f"Each UID will receive {self.TOTAL_QUERIES_PER_UID} total queries, " f"with {self.MIN_SCORED_QUERIES} of them being scored.") # Initialize scores and counts self.total_scores = {} - self.score_counts = {} # Number of times a UID has been scored - self.total_queries_sent = {} # Total queries sent to each UID + self.score_counts = {} # Number of times a UID has been scored + self.total_queries_sent = {} # Total queries sent to each UID self.moving_average_scores = None # Set up axon and dendrite @@ -173,8 +174,10 @@ async def perform_synthetic_queries(self): uids_to_query = uids_to_query[:num_uids_to_query] selected_validator = self.select_validator() + selected_validator.select_random_provider_and_model() # Perform synthetic queries + bt.logging.info("start querying to miners") query_responses = await self.perform_queries(selected_validator, uids_to_query) # Store queries and responses in the shared database @@ -216,23 +219,25 @@ async def perform_queries(self, selected_validator, uids_to_query): continue return query_responses + @handle_response async def query_miner(self, uid, synapse): - try: - axon = self.metagraph.axons[uid] - responses = await self.dendrite( - axons=[axon], - synapse=synapse, - deserialize=False, - timeout=synapse.timeout, - streaming=False, - ) - # Handle the response appropriately - return responses[0] # Assuming responses is a list - except Exception as e: - bt.logging.error(f"Exception during query for uid {uid}: {e}") - return None + axon = self.metagraph.axons[uid] + + streaming = False + if isinstance(synapse, bt.StreamingSynapse): + streaming = True + + responses = await self.dendrite( + axons=[axon], + synapse=synapse, + deserialize=False, + timeout=synapse.timeout, + streaming=streaming, + ) + # Handle the response appropriately + return responses[0] # Assuming responses is a list - def select_validator(self): + def select_validator(self) -> BaseValidator: rand = random.random() text_validator = ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) image_validator = ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, @@ -367,7 +372,8 @@ async def images(self, synapse: ImageResponse) -> ImageResponse: 'response': synapse_response, 'query_type': 'organic', 'timestamp': asyncio.get_event_loop().time(), - 'validator': ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, metagraph=self.metagraph) + 'validator': ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, + metagraph=self.metagraph) }) # Update total_queries_sent self.total_queries_sent[synapse.uid] += 1 @@ -390,7 +396,8 @@ async def embeddings(self, synapse: Embeddings) -> Embeddings: 'response': synapse_response, 'query_type': 'organic', 'timestamp': asyncio.get_event_loop().time(), - 'validator': ValidatorRegistryMeta.get_class('EmbeddingsValidator')(config=self.config, metagraph=self.metagraph) + 'validator': ValidatorRegistryMeta.get_class('EmbeddingsValidator')(config=self.config, + metagraph=self.metagraph) }) # Update total_queries_sent self.total_queries_sent[synapse.uid] += 1 @@ -436,7 +443,8 @@ async def _prompt(synapse, send: Send): 'response': response_text, 'query_type': 'organic', 'timestamp': asyncio.get_event_loop().time(), - 'validator': ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) + 'validator': ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, + metagraph=self.metagraph) }) # Update total_queries_sent self.total_queries_sent[synapse.uid] += 1 @@ -495,4 +503,4 @@ async def process_queries_from_database(self): # Stop scoring if MIN_SCORED_QUERIES reached if self.score_counts[uid] >= self.MIN_SCORED_QUERIES: - bt.logging.info(f"UID {uid} has reached the minimum number of scored queries.") \ No newline at end of file + bt.logging.info(f"UID {uid} has reached the minimum number of scored queries.") From c8cbda3465444c153c991a5efeb1a4369b81fb0c Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 13 Sep 2024 09:08:27 -0700 Subject: [PATCH 03/80] bug fixes after upgrade --- validators/services/validators/base_validator.py | 10 +++++----- validators/services/validators/image_validator.py | 2 +- validators/services/validators/text_validator.py | 7 +++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index fb82561e..d9f1e882 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -58,7 +58,7 @@ def should_i_score(self): return True @abstractmethod - async def get_answer_task(self, uid, synapse=None): + async def get_answer_task(self, uid, synapse, response): pass @abstractmethod @@ -71,14 +71,14 @@ async def score_responses(self, responses): uid_scores_dict = {} scored_response = [] - for uid, syn in responses: - task = self.get_answer_task(uid, syn) + for uid, query_resp in responses: + task = self.get_answer_task(uid, query_resp.get("query"), query_resp.get("response")) answering_tasks.append((uid, task)) answers_results = await asyncio.gather(*[task for _, task in answering_tasks]) - for (uid, response), answer in zip(responses, answers_results): - task = self.get_scoring_task(uid, answer, response) + for (uid, query_resp), answer in zip(responses, answers_results): + task = self.get_scoring_task(uid, answer, query_resp.get("response")) scoring_tasks.append((uid, task)) # Await all scoring tasks diff --git a/validators/services/validators/image_validator.py b/validators/services/validators/image_validator.py index 04ee7426..8b869235 100644 --- a/validators/services/validators/image_validator.py +++ b/validators/services/validators/image_validator.py @@ -78,7 +78,7 @@ async def get_scoring_task(self, uid, answer, response: ImageResponse): score = 0 # cortext.reward.deterministic_score(uid, syn, self.weight) return score - async def get_answer_task(self, uid, synapse=None): + async def get_answer_task(self, uid, synapse: ImageResponse, response): return synapse @error_handler diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index c7ba1d93..fe4d2d22 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -156,10 +156,9 @@ async def call_api(self, prompt: str, image_url: Optional[str], provider: str) - else: bt.logging.error(f"provider {provider} not found") - async def get_answer_task(self, uid: int, syn=None): - question = self.uid_to_questions[uid] - prompt = question.get("prompt") - image_url = question.get("image") + async def get_answer_task(self, uid: int, query_syn: StreamPrompting, response): + prompt = query_syn.messages[0].get("content") + image_url = query_syn.messages[0].get("image") return await self.call_api(prompt, image_url, self.provider) async def get_scoring_task(self, uid, answer, response): From cf684b63d5a4e8039b6a009cf80a51b9c40c3795 Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 13 Sep 2024 09:53:28 -0700 Subject: [PATCH 04/80] fix batching query to miners --- validators/weight_setter.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index d6d5dbef..4612a5d0 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -209,14 +209,19 @@ def should_i_score(self): async def perform_queries(self, selected_validator, uids_to_query): query_responses = [] - for uid in uids_to_query: + tasks = [] + async def query_miner_per_uid(uid): try: query_syn = await selected_validator.create_query(uid) response = await self.query_miner(uid, query_syn) query_responses.append((uid, {'query': query_syn, 'response': response})) except Exception as e: bt.logging.error(f"Exception during query for uid {uid}: {e}") - continue + + for uid in uids_to_query: + task = query_miner_per_uid(uid) + tasks.append(task) + await asyncio.gather(*tasks) return query_responses @handle_response From 1d245ceacd3108b92d242ee9ebe273d0621ad007 Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 13 Sep 2024 11:49:03 -0700 Subject: [PATCH 05/80] add batch querying and scoring --- .../services/validators/base_validator.py | 6 +- .../services/validators/text_validator.py | 4 +- validators/weight_setter.py | 56 ++++++++++--------- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index d9f1e882..ba616e5d 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -7,6 +7,7 @@ import bittensor as bt from cortext.metaclasses import ValidatorRegistryMeta +from validators.utils import error_handler dataset = None @@ -65,6 +66,7 @@ async def get_answer_task(self, uid, synapse, response): async def get_scoring_task(self, uid, answer, response): pass + @error_handler async def score_responses(self, responses): answering_tasks = [] scoring_tasks = [] @@ -89,12 +91,14 @@ async def score_responses(self, responses): for (uid, _), scored_response in zip(scoring_tasks, scored_responses): if scored_response is not None: + bt.logging.trace(f"scored response is None for uid {uid}") uid_scores_dict[uid] = float(scored_response) else: uid_scores_dict[uid] = 0 if uid_scores_dict != {}: - bt.logging.info(f"text_scores is {uid_scores_dict}") + validator_type = self.__class__.__name__ + bt.logging.info(f"{validator_type} scores is {uid_scores_dict}") bt.logging.trace("score_responses process completed.") return uid_scores_dict, scored_response, responses diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index fe4d2d22..571619ba 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -72,9 +72,9 @@ async def organic(self, metagraph, query: dict[str, list[dict[str, str]]]) -> As bt.logging.trace(resp) yield uid, resp - async def get_question(self): + async def get_question(self, miner_cnt=1): is_vision_model = self.model in constants.VISION_MODELS - question = await get_question("text", 1, is_vision_model) + question = await get_question("text", miner_cnt, is_vision_model) return question async def create_query(self, uid) -> bt.Synapse: diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 4612a5d0..45fa84ec 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -3,6 +3,8 @@ import random import torch import traceback + +from black.trans import defaultdict from substrateinterface import SubstrateInterface from functools import partial from typing import Tuple @@ -14,7 +16,7 @@ from cortext.protocol import IsAlive, StreamPrompting, ImageResponse, Embeddings from cortext.metaclasses import ValidatorRegistryMeta -from validators.services import CapacityService, BaseValidator +from validators.services import CapacityService, BaseValidator, TextValidator, ImageValidator from validators.utils import handle_response scoring_organic_timeout = 60 @@ -209,19 +211,18 @@ def should_i_score(self): async def perform_queries(self, selected_validator, uids_to_query): query_responses = [] - tasks = [] - async def query_miner_per_uid(uid): - try: - query_syn = await selected_validator.create_query(uid) - response = await self.query_miner(uid, query_syn) - query_responses.append((uid, {'query': query_syn, 'response': response})) - except Exception as e: - bt.logging.error(f"Exception during query for uid {uid}: {e}") - + response_tasks = [] + query_tasks = [] for uid in uids_to_query: - task = query_miner_per_uid(uid) - tasks.append(task) - await asyncio.gather(*tasks) + query_task = selected_validator.create_query(uid) + query_tasks.append(query_task) + queries = await asyncio.gather(*query_tasks) + for uid, query in zip(uids_to_query, queries): + response_tasks.append(self.query_miner(uid, query)) + + responses = await asyncio.gather(*response_tasks) + for uid, query_syn, response in zip(uids_to_query, queries, responses): + query_responses.append((uid, {'query': query_syn, 'response': response})) return query_responses @handle_response @@ -483,25 +484,28 @@ async def process_queries_from_database(self): queries_to_process = self.query_database.copy() self.query_database.clear() + validator_to_query_resps = defaultdict(list) + type_to_validator = {} # Process queries outside the lock to prevent blocking for query_data in queries_to_process: uid = query_data['uid'] synapse = query_data['synapse'] response = query_data['response'] validator = query_data['validator'] - - # Prepare query response data in the format expected by the validator - query_responses = [(uid, {'query': synapse, 'response': response})] - - # Score the response using the validator - try: - uid_scores_dict, _, _ = await validator.score_responses(query_responses) - except Exception as e: - bt.logging.error(f"Error scoring response for UID {uid}: {e}") - continue - - # Update total_scores and score_counts - async with self.lock: + type_to_validator[type(validator)] = validator + validator_to_query_resps[type(validator)].append((uid, {'query': synapse, 'response': response})) + + score_tasks = [] + for vali_type in type_to_validator: + validator = type_to_validator[vali_type] + text_score_task = validator.score_responses(validator_to_query_resps[vali_type]) + score_tasks.append(text_score_task) + + resps = await asyncio.gather(*score_tasks) + resps = [item for item in resps if item is not None] + # Update total_scores and score_counts + async with self.lock: + for uid_scores_dict, _, _ in resps: for uid, score in uid_scores_dict.items(): self.total_scores[uid] += score self.score_counts[uid] += 1 From 143138d2bf818e9c152e56d837da08ac73ddeda1 Mon Sep 17 00:00:00 2001 From: acer-king Date: Mon, 16 Sep 2024 02:47:18 -0700 Subject: [PATCH 06/80] feature-0001: add penatly based on process time of request for Image task --- cortext/protocol.py | 6 ++++ .../services/validators/base_validator.py | 3 +- .../services/validators/image_validator.py | 7 ++-- validators/utils.py | 34 ++++++++++++++++++- 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/cortext/protocol.py b/cortext/protocol.py index 4fec3371..f6d1065f 100644 --- a/cortext/protocol.py +++ b/cortext/protocol.py @@ -124,6 +124,12 @@ class ImageResponse(bt.Synapse): description="A list of fields required for the hash." ) + process_time: int = pydantic.Field( + default=9999, + title="process time", + description="processed time of querying dendrite.", + ) + def deserialize(self) -> Optional[Dict]: """ Deserialize the completion data of the image response. """ return self.completion diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index ba616e5d..ac705564 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -7,7 +7,7 @@ import bittensor as bt from cortext.metaclasses import ValidatorRegistryMeta -from validators.utils import error_handler +from validators.utils import error_handler, apply_for_time_penalty_to_uid_scores dataset = None @@ -67,6 +67,7 @@ async def get_scoring_task(self, uid, answer, response): pass @error_handler + @apply_for_time_penalty_to_uid_scores async def score_responses(self, responses): answering_tasks = [] scoring_tasks = [] diff --git a/validators/services/validators/image_validator.py b/validators/services/validators/image_validator.py index 8b869235..08a2cbe9 100644 --- a/validators/services/validators/image_validator.py +++ b/validators/services/validators/image_validator.py @@ -65,16 +65,19 @@ def should_i_score(self): return rand < 1 / 1 async def get_scoring_task(self, uid, answer, response: ImageResponse): - if answer is None: + if response is None: + bt.logging.trace(f"response is None. so return score with 0 for this uid {uid}.") return 0 if response.provider == "OpenAI": - completion = answer.completion + completion = response.completion if completion is None: + bt.logging.trace(f"response completion is None for uid {uid}. so return score with 0") return 0 image_url = completion["url"] score = await cortext.reward.dalle_score(uid, image_url, self.size, response.messages, self.weight) else: + bt.logging.trace(f"not found provider type {response.provider}") score = 0 # cortext.reward.deterministic_score(uid, syn, self.weight) return score diff --git a/validators/utils.py b/validators/utils.py index d2d64404..fd646824 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -1,3 +1,4 @@ +import time import aiohttp import asyncio import base64 @@ -10,6 +11,8 @@ from functools import wraps import logging +from cortext import ImageResponse + async def download_image(url): try: @@ -53,11 +56,17 @@ def handle_response(func): @wraps(func) async def wrapper(*args, **kwargs): try: + start_time = time.time() response = await func(*args, **kwargs) + end_time = time.time() if inspect.isasyncgen(response): return await handle_response_stream(response) - else: + elif isinstance(response, ImageResponse): + response.process_time = end_time - start_time return response + else: + bt.logging.error(f"Not found response type: {type(response)}") + return None except Exception as err: bt.logging.exception(f"Exception during query for uid {args[1]}, {err}") return None @@ -65,6 +74,29 @@ async def wrapper(*args, **kwargs): return wrapper +def apply_for_time_penalty_to_uid_scores(func): + @wraps(func) + async def wrapper(*args, **kwargs): + uid_to_scores, scores, resps = await func(*args, **kwargs) + for uid, query_resp in resps: + resp_synapse = query_resp.get("response") + if isinstance(resp_synapse, ImageResponse): + score = uid_to_scores[uid] + factor = 64 + max_penalty = 0.5 + if resp_synapse.process_time < 5: + bt.logging.trace(f"process time is less than 5 sec. so don't apply penalty for uid {uid}") + else: + penalty = min(max_penalty * pow(resp_synapse.process_time, 1.5) / pow(factor, 1.5), max_penalty) + bt.logging.trace(f"penatly {penalty} is applied to miner {uid} " + f"for process time {resp_synapse.process_time}") + score -= penalty + uid_to_scores[uid] = max(score, 0) + return uid_to_scores, scores, resps + + return wrapper + + def get_should_i_score_arr_for_text(): for i in itertools.count(): yield (i % 3) == 0 From 548d6f5657f0148cc9af16a2205ccf7d7fa4d63a Mon Sep 17 00:00:00 2001 From: acer-king Date: Mon, 16 Sep 2024 07:08:41 -0700 Subject: [PATCH 07/80] feature-0002: add time penalty for Streaming task --- .../services/validators/text_validator.py | 3 +- validators/utils.py | 28 ++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index 210977ea..8362ac70 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -162,4 +162,5 @@ async def get_answer_task(self, uid: int, query_syn: StreamPrompting, response): return await self.call_api(prompt, image_url, self.provider) async def get_scoring_task(self, uid, answer, response): - return await cortext.reward.api_score(answer, response, self.weight, self.temperature, self.provider) + response_str, _ = response + return await cortext.reward.api_score(answer, response_str, self.weight, self.temperature, self.provider) diff --git a/validators/utils.py b/validators/utils.py index fd646824..027bdd81 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -58,11 +58,11 @@ async def wrapper(*args, **kwargs): try: start_time = time.time() response = await func(*args, **kwargs) - end_time = time.time() if inspect.isasyncgen(response): - return await handle_response_stream(response) + result = await handle_response_stream(response) + return result, time.time() - start_time elif isinstance(response, ImageResponse): - response.process_time = end_time - start_time + response.process_time = time.time() - start_time return response else: bt.logging.error(f"Not found response type: {type(response)}") @@ -81,6 +81,7 @@ async def wrapper(*args, **kwargs): for uid, query_resp in resps: resp_synapse = query_resp.get("response") if isinstance(resp_synapse, ImageResponse): + # apply penalty for image task. score = uid_to_scores[uid] factor = 64 max_penalty = 0.5 @@ -88,10 +89,29 @@ async def wrapper(*args, **kwargs): bt.logging.trace(f"process time is less than 5 sec. so don't apply penalty for uid {uid}") else: penalty = min(max_penalty * pow(resp_synapse.process_time, 1.5) / pow(factor, 1.5), max_penalty) - bt.logging.trace(f"penatly {penalty} is applied to miner {uid} " + bt.logging.trace(f"penalty {penalty} is applied to miner {uid} " f"for process time {resp_synapse.process_time}") score -= penalty uid_to_scores[uid] = max(score, 0) + elif isinstance(resp_synapse, tuple): + # apply penalty for streaming task. + resp_str, process_time = resp_synapse + total_work_done = len(resp_str) + chars_per_sec = total_work_done / process_time + bt.logging.debug(f"speed of streaming is {chars_per_sec} chars per second") + + base_speed = 50 + if chars_per_sec >= base_speed: + bt.logging.trace(f"don't apply penalty for this uid {uid}") + else: + max_penalty = 0.5 + penalty = min((base_speed - chars_per_sec) / base_speed, max_penalty) # max penalty is 0.5 + new_score = max(uid_to_scores[uid] - penalty, 0) + bt.logging.debug(f"penalty is {penalty}, new_score is {new_score} for uid {uid}") + uid_to_scores[uid] = new_score + else: + pass + return uid_to_scores, scores, resps return wrapper From 99a0194b6bc4bee22e958dc7282ee0140b2258ac Mon Sep 17 00:00:00 2001 From: acer-king Date: Mon, 16 Sep 2024 14:41:30 -0700 Subject: [PATCH 08/80] feature-0002-add-bandwidth --- .../services/validators/base_validator.py | 5 +- .../services/validators/image_validator.py | 9 ++- .../services/validators/text_validator.py | 14 ++++- validators/weight_setter.py | 63 ++++++++++++------- 4 files changed, 60 insertions(+), 31 deletions(-) diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index ac705564..b11ffa32 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -42,6 +42,10 @@ async def query_miner(self, metagraph, uid, syn): def select_random_provider_and_model(self): pass + + def get_provider_to_models(self): + pass + async def handle_response(self, uid, response) -> Tuple[int, bt.Synapse]: if type(response) == list and response: response = response[0] @@ -67,7 +71,6 @@ async def get_scoring_task(self, uid, answer, response): pass @error_handler - @apply_for_time_penalty_to_uid_scores async def score_responses(self, responses): answering_tasks = [] scoring_tasks = [] diff --git a/validators/services/validators/image_validator.py b/validators/services/validators/image_validator.py index 08a2cbe9..3afee83b 100644 --- a/validators/services/validators/image_validator.py +++ b/validators/services/validators/image_validator.py @@ -49,14 +49,17 @@ def select_random_provider_and_model(self): elif self.provider == "OpenAI": self.model = "dall-e-3" + def get_provider_to_models(self): + return "OpenAI", "dall-e-3" + async def get_question(self): question = await get_question("images", 1) return question - async def create_query(self, uid) -> bt.Synapse: + async def create_query(self, uid, provider=None, model=None) -> bt.Synapse: question = await self.get_question() - syn = ImageResponse(messages=question, model=self.model, size=self.size, quality=self.quality, - style=self.style, provider=self.provider, seed=self.seed, steps=self.steps) + syn = ImageResponse(messages=question, model=model, size=self.size, quality=self.quality, + style=self.style, provider=provider, seed=self.seed, steps=self.steps) bt.logging.info(f"uid = {uid}, syn = {syn}") return syn diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index 8362ac70..6b9b40bd 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -77,7 +77,7 @@ async def get_question(self, miner_cnt=1): question = await get_question("text", miner_cnt, is_vision_model) return question - async def create_query(self, uid) -> bt.Synapse: + async def create_query(self, uid, provider=None, model=None) -> bt.Synapse: question = await self.get_question() prompt = question.get("prompt") image = question.get("image") @@ -86,8 +86,8 @@ async def create_query(self, uid) -> bt.Synapse: else: messages = [{'role': 'user', 'content': prompt}] - syn = StreamPrompting(messages=messages, model=self.model, seed=self.seed, max_tokens=self.max_tokens, - temperature=self.temperature, provider=self.provider, top_p=self.top_p, + syn = StreamPrompting(messages=messages, model=model, seed=self.seed, max_tokens=self.max_tokens, + temperature=self.temperature, provider=provider, top_p=self.top_p, top_k=self.top_k) return syn @@ -101,6 +101,14 @@ def select_random_provider_and_model(self): self.model = random.choices(list(model_to_weights.keys()), weights=list(model_to_weights.values()), k=1)[0] + def get_provider_to_models(self): + provider_models = [] + for provider in constants.TEXT_VALI_MODELS_WEIGHTS: + models = constants.TEXT_VALI_MODELS_WEIGHTS.get(provider).keys() + for model_ in models: + provider_models.append((provider, model_)) + return provider_models + @classmethod def should_i_score(cls): return next(cls.gen_should_i_score) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 45fa84ec..86176344 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -36,9 +36,10 @@ def __init__(self, config): bt.logging.info(f"Running validator on subnet: {self.netuid} with uid: {self.my_uid}") # Scoring and querying parameters - self.MIN_SCORED_QUERIES = 10 # Minimum number of times each UID should be scored per epoch - self.scoring_percent = 0.1 # Percentage of total queries that will be scored + self.MIN_SCORED_QUERIES = 1 # Minimum number of times each UID should be scored per epoch + self.scoring_percent = 1 # Percentage of total queries that will be scored self.TOTAL_QUERIES_PER_UID = int(self.MIN_SCORED_QUERIES / self.scoring_percent) + self.max_score_cnt_per_model = 1 bt.logging.info(f"Each UID will receive {self.TOTAL_QUERIES_PER_UID} total queries, " f"with {self.MIN_SCORED_QUERIES} of them being scored.") @@ -176,7 +177,6 @@ async def perform_synthetic_queries(self): uids_to_query = uids_to_query[:num_uids_to_query] selected_validator = self.select_validator() - selected_validator.select_random_provider_and_model() # Perform synthetic queries bt.logging.info("start querying to miners") @@ -189,16 +189,14 @@ async def perform_synthetic_queries(self): self.total_queries_sent[uid] += 1 # Decide whether to score this query - if self.should_i_score(): - self.query_database.append({ - 'uid': uid, - 'synapse': response_data['query'], - 'response': response_data['response'], - 'query_type': 'synthetic', - 'timestamp': asyncio.get_event_loop().time(), - 'validator': selected_validator - }) - # If not scoring, we can still log the query if needed + self.query_database.append({ + 'uid': uid, + 'synapse': response_data['query'], + 'response': response_data['response'], + 'query_type': 'synthetic', + 'timestamp': asyncio.get_event_loop().time(), + 'validator': selected_validator + }) bt.logging.info(f"Performed synthetic queries for UIDs: {uids_to_query}") @@ -213,16 +211,23 @@ async def perform_queries(self, selected_validator, uids_to_query): query_responses = [] response_tasks = [] query_tasks = [] - for uid in uids_to_query: - query_task = selected_validator.create_query(uid) - query_tasks.append(query_task) - queries = await asyncio.gather(*query_tasks) - for uid, query in zip(uids_to_query, queries): - response_tasks.append(self.query_miner(uid, query)) - - responses = await asyncio.gather(*response_tasks) - for uid, query_syn, response in zip(uids_to_query, queries, responses): - query_responses.append((uid, {'query': query_syn, 'response': response})) + provider_to_models = selected_validator.get_provider_to_models() + uids_to_query_expand = [] + for provider, model in provider_to_models: + for uid in uids_to_query: + band_width = self.uid_to_capacity.get(provider).get(model) + for _ in range(band_width): + query_task = selected_validator.create_query(uid, provider, model) + query_tasks.append(query_task) + uids_to_query_expand.append(uid) + + queries = await asyncio.gather(*query_tasks) + for uid, query in zip(uids_to_query_expand, queries): + response_tasks.append(self.query_miner(uid, query)) + + responses = await asyncio.gather(*response_tasks) + for uid, query_syn, response in zip(uids_to_query_expand, queries, responses): + query_responses.append((uid, {'query': query_syn, 'response': response})) return query_responses @handle_response @@ -484,6 +489,7 @@ async def process_queries_from_database(self): queries_to_process = self.query_database.copy() self.query_database.clear() + grouped_query_resps = defaultdict(list) validator_to_query_resps = defaultdict(list) type_to_validator = {} # Process queries outside the lock to prevent blocking @@ -493,7 +499,16 @@ async def process_queries_from_database(self): response = query_data['response'] validator = query_data['validator'] type_to_validator[type(validator)] = validator - validator_to_query_resps[type(validator)].append((uid, {'query': synapse, 'response': response})) + provider = synapse.provider + model = synapse.model + + grouped_key = f"{type(validator)}:{uid}:{provider}:{model}" + if len(grouped_query_resps[grouped_key]) < self.max_score_cnt_per_model: + grouped_query_resps[grouped_key].append( + (uid, {'query': synapse, 'response': response})) + validator_to_query_resps[type(validator)].append( + (uid, {'query': synapse, 'response': response}) + ) score_tasks = [] for vali_type in type_to_validator: From 511728afcd657c42a1078b6f65fd60c2080f35fe Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 17 Sep 2024 04:34:09 -0700 Subject: [PATCH 09/80] send queries based on bandwidth and score only one of them per each uid --- miner/providers/anthropicbedrock.py | 32 ++++--- .../services/validators/base_validator.py | 6 +- .../validators/embeddings_validator.py | 6 +- .../services/validators/image_validator.py | 6 +- .../services/validators/text_validator.py | 4 + validators/weight_setter.py | 91 ++++++++++--------- 6 files changed, 85 insertions(+), 60 deletions(-) diff --git a/miner/providers/anthropicbedrock.py b/miner/providers/anthropicbedrock.py index b1de716d..e9e25afc 100644 --- a/miner/providers/anthropicbedrock.py +++ b/miner/providers/anthropicbedrock.py @@ -11,10 +11,9 @@ class AnthropicBedrock(Provider): def __init__(self, synapse): super().__init__(synapse) bedrock_client_parameters = { - "service_name": 'bedrock-runtime', - "aws_access_key_id": config.AWS_ACCESS_KEY, - "aws_secret_access_key": config.AWS_SECRET_KEY, - "region_name": "us-east-1" + "aws_secret_key": config.AWS_ACCESS_KEY, + "aws_access_key": config.AWS_SECRET_KEY, + "aws_region": "us-east-1" } self.anthropic_bedrock_client = AsyncAnthropicBedrock(timeout=config.ASYNC_TIME_OUT, @@ -22,15 +21,22 @@ def __init__(self, synapse): @error_handler async def _prompt(self, synapse: StreamPrompting, send: Send): - stream = await self.anthropic_bedrock_client.completions.create( - prompt=f"\n\nHuman: {self.messages}\n\nAssistant:", - max_tokens_to_sample=self.max_tokens, - temperature=self.temperature, # must be <= 1.0 - top_k=self.top_k, - top_p=self.top_p, - model=self.model, - stream=True, - ) + stream = [] + try: + stream = await self.anthropic_bedrock_client.completions.create( + prompt=f"\n\nHuman: {self.messages}\n\nAssistant:", + max_tokens_to_sample=self.max_tokens, + temperature=self.temperature, # must be <= 1.0 + top_k=self.top_k, + top_p=self.top_p, + model=self.model, + stream=True, + ) + except Exception as err: + bt.logging.exception(err) + bt.logging.info("errrr acerr") + await send({"type": "http.response.body", "body": b'', "more_body": False}) + async for completion in stream: if completion.completion: diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index b11ffa32..c8f03c2a 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -95,7 +95,7 @@ async def score_responses(self, responses): for (uid, _), scored_response in zip(scoring_tasks, scored_responses): if scored_response is not None: - bt.logging.trace(f"scored response is None for uid {uid}") + bt.logging.trace(f"scored response is {scored_response} for uid {uid}") uid_scores_dict[uid] = float(scored_response) else: uid_scores_dict[uid] = 0 @@ -112,3 +112,7 @@ async def get_and_score(self, available_uids: List[int]): query_responses = await self.start_query(available_uids) bt.logging.trace("scoring query with query responses") return await self.score_responses(query_responses) + + @classmethod + def get_task_type(cls): + pass \ No newline at end of file diff --git a/validators/services/validators/embeddings_validator.py b/validators/services/validators/embeddings_validator.py index 53d89260..02390a56 100644 --- a/validators/services/validators/embeddings_validator.py +++ b/validators/services/validators/embeddings_validator.py @@ -8,9 +8,9 @@ from validators.services.validators.base_validator import BaseValidator -class EmbeddingsValidator(BaseValidator): - def __init__(self, config): - super().__init__(config) +class EmbeddingsValidator: + def __init__(self, config, metagraph=None): + super().__init__(config, metagraph) self.streaming = False self.config = config self.query_type = "embeddings" diff --git a/validators/services/validators/image_validator.py b/validators/services/validators/image_validator.py index 3afee83b..54d91cff 100644 --- a/validators/services/validators/image_validator.py +++ b/validators/services/validators/image_validator.py @@ -50,7 +50,7 @@ def select_random_provider_and_model(self): self.model = "dall-e-3" def get_provider_to_models(self): - return "OpenAI", "dall-e-3" + return [("OpenAI", "dall-e-3")] async def get_question(self): question = await get_question("images", 1) @@ -109,3 +109,7 @@ async def build_wandb_data(self, scores, responses): self.wandb_data["images"][uid] = wandb.Image(image) if image is not None else '' self.wandb_data["prompts"][uid] = self.uid_to_questions[uid] return self.wandb_data + + @classmethod + def get_task_type(cls): + return ImageResponse.__name__ diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index 6b9b40bd..e8150023 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -172,3 +172,7 @@ async def get_answer_task(self, uid: int, query_syn: StreamPrompting, response): async def get_scoring_task(self, uid, answer, response): response_str, _ = response return await cortext.reward.api_score(answer, response_str, self.weight, self.temperature, self.provider) + + @classmethod + def get_task_type(cls): + return StreamPrompting.__name__ diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 86176344..a457d08a 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -39,7 +39,7 @@ def __init__(self, config): self.MIN_SCORED_QUERIES = 1 # Minimum number of times each UID should be scored per epoch self.scoring_percent = 1 # Percentage of total queries that will be scored self.TOTAL_QUERIES_PER_UID = int(self.MIN_SCORED_QUERIES / self.scoring_percent) - self.max_score_cnt_per_model = 1 + self.max_score_cnt_per_uid = 1 bt.logging.info(f"Each UID will receive {self.TOTAL_QUERIES_PER_UID} total queries, " f"with {self.MIN_SCORED_QUERIES} of them being scored.") @@ -176,30 +176,25 @@ async def perform_synthetic_queries(self): num_uids_to_query = min(self.config.max_miners_cnt, len(uids_to_query)) uids_to_query = uids_to_query[:num_uids_to_query] - selected_validator = self.select_validator() - - # Perform synthetic queries - bt.logging.info("start querying to miners") - query_responses = await self.perform_queries(selected_validator, uids_to_query) - - # Store queries and responses in the shared database - async with self.lock: - for uid, response_data in query_responses: - # Update total_queries_sent - self.total_queries_sent[uid] += 1 - - # Decide whether to score this query - self.query_database.append({ - 'uid': uid, - 'synapse': response_data['query'], - 'response': response_data['response'], - 'query_type': 'synthetic', - 'timestamp': asyncio.get_event_loop().time(), - 'validator': selected_validator - }) - + for selected_validator in self.get_validators(): + # Perform synthetic queries + bt.logging.info("start querying to miners") + query_responses = await self.perform_queries(selected_validator, uids_to_query) + # Store queries and responses in the shared database + async with self.lock: + for uid, response_data in query_responses: + # Decide whether to score this query + self.query_database.append({ + 'uid': uid, + 'synapse': response_data['query'], + 'response': response_data['response'], + 'query_type': 'synthetic', + 'timestamp': asyncio.get_event_loop().time(), + 'validator': selected_validator + }) + await asyncio.sleep(1) + self.total_queries_sent[uid] += 1 bt.logging.info(f"Performed synthetic queries for UIDs: {uids_to_query}") - # Slow down the validator steps if necessary await asyncio.sleep(1) @@ -212,22 +207,24 @@ async def perform_queries(self, selected_validator, uids_to_query): response_tasks = [] query_tasks = [] provider_to_models = selected_validator.get_provider_to_models() + synapse_type = selected_validator.get_task_type() + uids_to_query_expand = [] for provider, model in provider_to_models: for uid in uids_to_query: - band_width = self.uid_to_capacity.get(provider).get(model) + band_width = self.uid_to_capacity.get(uid).bandwidth_rpm.get(f"{synapse_type}_{provider}") for _ in range(band_width): query_task = selected_validator.create_query(uid, provider, model) query_tasks.append(query_task) uids_to_query_expand.append(uid) - queries = await asyncio.gather(*query_tasks) - for uid, query in zip(uids_to_query_expand, queries): - response_tasks.append(self.query_miner(uid, query)) + queries = await asyncio.gather(*query_tasks) + for uid, query in zip(uids_to_query_expand, queries): + response_tasks.append(self.query_miner(uid, query)) - responses = await asyncio.gather(*response_tasks) - for uid, query_syn, response in zip(uids_to_query_expand, queries, responses): - query_responses.append((uid, {'query': query_syn, 'response': response})) + responses = await asyncio.gather(*response_tasks) + for uid, query_syn, response in zip(uids_to_query_expand, queries, responses): + query_responses.append((uid, {'query': query_syn, 'response': response})) return query_responses @handle_response @@ -258,6 +255,14 @@ def select_validator(self) -> BaseValidator: else: return image_validator + def get_validators(self): + validators = [] + all_classes = ValidatorRegistryMeta.all_classes() + for class_name, class_ref in all_classes.items(): + validator = ValidatorRegistryMeta.get_class(class_name)(config=self.config, metagraph=self.metagraph) + validators.append(validator) + return validators + async def get_capacities_for_uids(self, uids): capacity_service = CapacityService(metagraph=self.metagraph, dendrite=self.dendrite) uid_to_capacity = await capacity_service.query_capacity_to_miners(uids) @@ -498,17 +503,19 @@ async def process_queries_from_database(self): synapse = query_data['synapse'] response = query_data['response'] validator = query_data['validator'] - type_to_validator[type(validator)] = validator - provider = synapse.provider - model = synapse.model - - grouped_key = f"{type(validator)}:{uid}:{provider}:{model}" - if len(grouped_query_resps[grouped_key]) < self.max_score_cnt_per_model: - grouped_query_resps[grouped_key].append( - (uid, {'query': synapse, 'response': response})) - validator_to_query_resps[type(validator)].append( - (uid, {'query': synapse, 'response': response}) - ) + vali_type = type(validator).__name__ + type_to_validator[vali_type] = validator + + grouped_key = f"{vali_type}:{uid}" + grouped_query_resps[grouped_key].append( + (uid, {'query': synapse, 'response': response})) + + for key, uid_to_query_resps in grouped_query_resps.items(): + vali_type = str(key).split(":")[0] + if not uid_to_query_resps: + continue + query_resp_to_score_for_uid = random.choice(uid_to_query_resps) + validator_to_query_resps[vali_type].append(query_resp_to_score_for_uid) score_tasks = [] for vali_type in type_to_validator: From bf891a0064be006df5ba4ebe052f3a12feb39c19 Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 17 Sep 2024 12:53:08 -0700 Subject: [PATCH 10/80] update bandwidth and calculate score per miner and model once per epoch. calculate scoring based on bandwidth and weights of models --- cortext/constants.py | 89 +++++++++++++++++++ cortext/protocol.py | 2 +- miner/constants.py | 28 ------ miner/services/capacity.py | 4 +- .../services/validators/base_validator.py | 86 +++++++++++++----- validators/services/validators/constants.py | 48 ---------- .../services/validators/image_validator.py | 2 +- .../services/validators/text_validator.py | 3 +- validators/weight_setter.py | 25 +++--- 9 files changed, 170 insertions(+), 117 deletions(-) create mode 100644 cortext/constants.py delete mode 100644 miner/constants.py delete mode 100644 validators/services/validators/constants.py diff --git a/cortext/constants.py b/cortext/constants.py new file mode 100644 index 00000000..9f71ff82 --- /dev/null +++ b/cortext/constants.py @@ -0,0 +1,89 @@ +TEXT_MODEL = "gpt-4-turbo-2024-04-09" +TEXT_PROVIDER = "OpenAI" +TEXT_MAX_TOKENS = 4096 +TEXT_TEMPERATURE = 0.001 +TEXT_WEIGHT = 1 +TEXT_TOP_P = 0.01 +TEXT_TOP_K = 1 +VISION_MODELS = ["gpt-4o", "claude-3-opus-20240229", "anthropic.claude-3-sonnet-20240229-v1:0", + "claude-3-5-sonnet-20240620"] +TEXT_VALI_MODELS_WEIGHTS = { + # from https://openai.com/api/pricing/ + "OpenAI": { + "gpt-4o": 15.00, + "gpt-4o-mini": 0.600, + "gpt-3.5-turbo": 2.00, + "o1-preview": 60.00, + "o1-mini": 12.00, + }, + # from https://ai.google.dev/pricing + "Gemini": { + "gemini-1.5-flash": 0.30, + "gemini-1.5-pro": 10.50, + }, + # + "Anthropic": { + "claude-3-5-sonnet-20240620": 15.00, + # "claude-3-opus-20240229": 75, + "claude-3-haiku-20240307": 1.25, + }, + # model IDs from https://console.groq.com/docs/tool-use?hss_channel=tw-842860575289819136 + # prices not available yet, default to bedrock pricing + # free tier: 30 rpm + "Groq": { + "gemma2-9b-it": 0.22, + "llama-3.1-8b-instant": 0.22, + "llama-3.1-70b-versatile": .99, + # "llama-3.1-405b-reasoning": 16, + "mixtral-8x7b-32768": 0.7, + }, + # from https://aws.amazon.com/bedrock/pricing/ + # model IDs from https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns + "Bedrock": { + "mistral.mixtral-8x7b-instruct-v0:1": 0.7, + "mistral.mistral-large-2402-v1:0": 24, + "meta.llama3-1-8b-instruct-v1:0": 0.22, + "meta.llama3-1-70b-instruct-v1:0": 0.99, + # "meta.llama3-1-405b-instruct-v1:0": 16, + } +} + +bandwidth_to_model = { + "OpenAI": { + "gpt-4o": 1, + "gpt-4o-mini": 1, + "gpt-3.5-turbo": 1, + "o1-preview": 1, + "o1-mini": 1, + }, + # from https://ai.google.dev/pricing + "Gemini": { + "gemini-1.5-flash": 1, + "gemini-1.5-pro": 1, + }, + # + "Anthropic": { + "claude-3-5-sonnet-20240620": 1, + # "claude-3-opus-20240229": 1, + "claude-3-haiku-20240307": 1, + }, + # model IDs from https://console.groq.com/docs/tool-use?hss_channel=tw-842860575289819136 + # prices not available yet, default to bedrock pricing + # free tier: 30 rpm + "Groq": { + "gemma2-9b-it": 1, + "llama-3.1-8b-instant": 1, + "llama-3.1-70b-versatile": 1, + # "llama-3.1-405b-reasoning": 16, + "mixtral-8x7b-32768": 1, + }, + # from https://aws.amazon.com/bedrock/pricing/ + # model IDs from https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns + "Bedrock": { + "mistral.mixtral-8x7b-instruct-v0:1": 1, + "mistral.mistral-large-2402-v1:0": 1, + "meta.llama3-1-8b-instruct-v1:0": 1, + "meta.llama3-1-70b-instruct-v1:0": 1, + # "meta.llama3-1-405b-instruct-v1:0": 16, + } +} diff --git a/cortext/protocol.py b/cortext/protocol.py index f6d1065f..27fa3b02 100644 --- a/cortext/protocol.py +++ b/cortext/protocol.py @@ -15,7 +15,7 @@ class IsAlive(bt.Synapse): class Bandwidth(bt.Synapse): - bandwidth_rpm: Optional[Dict[str, int]] = None + bandwidth_rpm: Optional[Dict[str, dict]] = None class ImageResponse(bt.Synapse): """ A class to represent the response for an image-related request. """ diff --git a/miner/constants.py b/miner/constants.py deleted file mode 100644 index ef7371a5..00000000 --- a/miner/constants.py +++ /dev/null @@ -1,28 +0,0 @@ -from cortext import ImageResponse, StreamPrompting -from miner.providers import OpenAI, Anthropic, AnthropicBedrock, Groq, Gemini, Bedrock - -task_image = ImageResponse.__name__ -task_stream = StreamPrompting.__name__ - -openai_provider = OpenAI.__name__ -anthropic_provider = Anthropic.__name__ -anthropic_bedrock_provider = AnthropicBedrock.__name__ -groq_provider = Groq.__name__ -gemini_provider = Gemini.__name__ -bedrock_provider = Bedrock.__name__ - -capacity_to_task_and_provider = { - f"{task_image}_{openai_provider}": 1, - f"{task_image}_{anthropic_provider}": 1, - f"{task_image}_{anthropic_bedrock_provider}": 1, - f"{task_image}_{groq_provider}": 1, - f"{task_image}_{gemini_provider}": 1, - f"{task_image}_{bedrock_provider}": 1, - - f"{task_stream}_{openai_provider}": 1, - f"{task_stream}_{anthropic_provider}": 1, - f"{task_stream}_{anthropic_bedrock_provider}": 1, - f"{task_stream}_{groq_provider}": 1, - f"{task_stream}_{gemini_provider}": 1, - f"{task_stream}_{bedrock_provider}": 1, -} diff --git a/miner/services/capacity.py b/miner/services/capacity.py index eab539b9..d64b0c48 100644 --- a/miner/services/capacity.py +++ b/miner/services/capacity.py @@ -5,7 +5,7 @@ from .base import BaseService from cortext import ISALIVE_BLACKLIST_STAKE -from miner.constants import capacity_to_task_and_provider +from cortext.constants import bandwidth_to_model class CapacityService(BaseService): @@ -14,7 +14,7 @@ def __init__(self, metagraph, blacklist_amt=ISALIVE_BLACKLIST_STAKE): async def forward_fn(self, synapse: Bandwidth): bt.logging.debug("capacity request is being processed") - synapse.bandwidth_rpm = capacity_to_task_and_provider + synapse.bandwidth_rpm = bandwidth_to_model bt.logging.info("check status is executed.") return synapse diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index c8f03c2a..4d7492b9 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -1,13 +1,15 @@ from abc import abstractmethod import asyncio -from datasets import load_dataset +from collections import defaultdict + import random -from typing import List, Tuple +from typing import Tuple import bittensor as bt from cortext.metaclasses import ValidatorRegistryMeta -from validators.utils import error_handler, apply_for_time_penalty_to_uid_scores +from validators.utils import error_handler +from cortext.constants import TEXT_VALI_MODELS_WEIGHTS dataset = None @@ -42,7 +44,6 @@ async def query_miner(self, metagraph, uid, syn): def select_random_provider_and_model(self): pass - def get_provider_to_models(self): pass @@ -71,19 +72,18 @@ async def get_scoring_task(self, uid, answer, response): pass @error_handler - async def score_responses(self, responses): + async def score_responses(self, uid_to_query_resps, uid_to_capacity): answering_tasks = [] scoring_tasks = [] - uid_scores_dict = {} scored_response = [] - for uid, query_resp in responses: + for uid, query_resp in uid_to_query_resps: task = self.get_answer_task(uid, query_resp.get("query"), query_resp.get("response")) answering_tasks.append((uid, task)) answers_results = await asyncio.gather(*[task for _, task in answering_tasks]) - for (uid, query_resp), answer in zip(responses, answers_results): + for (uid, query_resp), answer in zip(uid_to_query_resps, answers_results): task = self.get_scoring_task(uid, answer, query_resp.get("response")) scoring_tasks.append((uid, task)) @@ -93,26 +93,64 @@ async def score_responses(self, responses): scored_responses) if scored_responses else 0 bt.logging.debug(f"scored responses = {scored_responses}, average score = {average_score}") - for (uid, _), scored_response in zip(scoring_tasks, scored_responses): - if scored_response is not None: - bt.logging.trace(f"scored response is {scored_response} for uid {uid}") - uid_scores_dict[uid] = float(scored_response) - else: - uid_scores_dict[uid] = 0 + uid_scores_dict = self.get_uid_to_scores_dict(uid_to_query_resps, scored_responses, uid_to_capacity) - if uid_scores_dict != {}: - validator_type = self.__class__.__name__ - bt.logging.info(f"{validator_type} scores is {uid_scores_dict}") bt.logging.trace("score_responses process completed.") - return uid_scores_dict, scored_response, responses + return uid_scores_dict, scored_response, uid_to_query_resps - async def get_and_score(self, available_uids: List[int]): - bt.logging.trace("starting query") - query_responses = await self.start_query(available_uids) - bt.logging.trace("scoring query with query responses") - return await self.score_responses(query_responses) + def get_uid_to_scores_dict(self, uid_to_query_resps, scored_responses: tuple[float], uid_to_capacity): + uid_provider_model_scores_dict = defaultdict(list) + + # collect all scores per each uid, provider, model + for (uid, query_resp), scored_response in zip(uid_to_query_resps, scored_responses): + synapse = query_resp.get('query') + provider = synapse.provider + model = synapse.model + if scored_response is not None: + bt.logging.trace(f"scored response is {scored_response} for uid {uid} for provider {provider} " + f"and for model {model}") + uid_provider_model_scores_dict[f"{uid}::{provider}::{model}"].append(float(scored_response)) + else: + uid_provider_model_scores_dict[f"{uid}::{provider}::{model}"].append(0) + + # get avg score value for each uid, provider, model + uid_provider_model_scores_avg_dict = {} + for key, scores in uid_provider_model_scores_dict.items(): + if len(scores) == 0: + bt.logging.debug(f"no scores found for this uid {key}") + avg_score = sum(scores) / len(scores) + uid_provider_model_scores_avg_dict[key] = avg_score + + # total_weights = 0 + # for provider, model_infos in TEXT_VALI_MODELS_WEIGHTS.items(): + # for model in model_infos: + # total_weights += model_infos.get(model) + + # apply weight for each model and calculate score based on weight of models. + uid_scores_dict = defaultdict(float) + for key, avg_score in uid_provider_model_scores_avg_dict.items(): + uid = int(str(key).split("::")[0]) + provider = str(key).split("::")[1] + model = str(key).split("::")[2] + model_weight = TEXT_VALI_MODELS_WEIGHTS.get(provider).get(model) + if model_weight is None: + bt.logging.debug(f"not weight found for this provider {provider} and model {model}") + model_weight = 0 + band_width = uid_to_capacity.get(uid).bandwidth_rpm.get(f"{provider}").get(f"{model}") + if band_width is None: + bt.logging.debug(f"no band_width found for this uid {uid}") + band_width = 1 + bt.logging.debug(f"bandwidth is {band_width}") + uid_scores_dict[uid] += avg_score * model_weight * band_width + bt.logging.debug(f"score {avg_score} for this model {model}, " + f"and weighted_score is {uid_scores_dict[uid]}") + + if not len(uid_scores_dict): + validator_type = self.__class__.__name__ + bt.logging.debug(f"{validator_type} scores is {uid_scores_dict}") + return uid_scores_dict @classmethod def get_task_type(cls): - pass \ No newline at end of file + pass diff --git a/validators/services/validators/constants.py b/validators/services/validators/constants.py deleted file mode 100644 index fb751637..00000000 --- a/validators/services/validators/constants.py +++ /dev/null @@ -1,48 +0,0 @@ -TEXT_MODEL = "gpt-4-turbo-2024-04-09" -TEXT_PROVIDER = "OpenAI" -TEXT_MAX_TOKENS = 4096 -TEXT_TEMPERATURE = 0.001 -TEXT_WEIGHT = 1 -TEXT_TOP_P = 0.01 -TEXT_TOP_K = 1 -VISION_MODELS = ["gpt-4o", "claude-3-opus-20240229", "anthropic.claude-3-sonnet-20240229-v1:0", - "claude-3-5-sonnet-20240620"] -TEXT_VALI_MODELS_WEIGHTS = { - "AnthropicBedrock": { - "anthropic.claude-v2:1": 1 - }, - "OpenAI": { - "gpt-4o": 1, - "gpt-3.5-turbo": 1000, - "o1-preview": 1, - "o1-mini": 1, - }, - "Gemini": { - "gemini-pro": 1, - "gemini-1.5-flash": 1, - "gemini-1.5-pro": 1, - }, - "Anthropic": { - "claude-3-5-sonnet-20240620": 1, - "claude-3-opus-20240229": 1, - "claude-3-sonnet-20240229": 1, - "claude-3-haiku-20240307": 1000, - }, - "Groq": { - "gemma-7b-it": 500, - "llama3-70b-8192": 1, - "llama3-8b-8192": 500, - "mixtral-8x7b-32768": 1, - }, - "Bedrock": { - # "anthropic.claude-3-sonnet-20240229-v1:0": 1, - "cohere.command-r-v1:0": 1, - # "meta.llama2-70b-chat-v1": 1, - # "amazon.titan-text-express-v1": 1, - "mistral.mistral-7b-instruct-v0:2": 1, - "ai21.j2-mid-v1": 1, - # "anthropic.claude-3-5-sonnet-20240620-v1:0": 1, - # "anthropic.claude-3-opus-20240229-v1:0": 1, - # "anthropic.claude-3-haiku-20240307-v1:0": 1 - } -} diff --git a/validators/services/validators/image_validator.py b/validators/services/validators/image_validator.py index 54d91cff..270a9394 100644 --- a/validators/services/validators/image_validator.py +++ b/validators/services/validators/image_validator.py @@ -12,7 +12,7 @@ import bittensor as bt -class ImageValidator(BaseValidator): +class ImageValidator: def __init__(self, config, metagraph=None): super().__init__(config, metagraph) self.num_uids_to_pick = 30 diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index e8150023..02d61843 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -1,10 +1,9 @@ -import asyncio import random import bittensor as bt from typing import AsyncIterator from cortext.reward import model -from . import constants +from cortext import constants import cortext.reward from validators.services.validators.base_validator import BaseValidator from validators.utils import error_handler diff --git a/validators/weight_setter.py b/validators/weight_setter.py index a457d08a..54278e66 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -16,7 +16,7 @@ from cortext.protocol import IsAlive, StreamPrompting, ImageResponse, Embeddings from cortext.metaclasses import ValidatorRegistryMeta -from validators.services import CapacityService, BaseValidator, TextValidator, ImageValidator +from validators.services import CapacityService, BaseValidator from validators.utils import handle_response scoring_organic_timeout = 60 @@ -24,7 +24,8 @@ class WeightSetter: def __init__(self, config): - self.available_uids = None + self.uid_to_capacity = {} + self.available_uids = [] bt.logging.info("Initializing WeightSetter") self.config = config self.wallet = config.wallet @@ -39,7 +40,7 @@ def __init__(self, config): self.MIN_SCORED_QUERIES = 1 # Minimum number of times each UID should be scored per epoch self.scoring_percent = 1 # Percentage of total queries that will be scored self.TOTAL_QUERIES_PER_UID = int(self.MIN_SCORED_QUERIES / self.scoring_percent) - self.max_score_cnt_per_uid = 1 + self.max_score_cnt_per_model = 1 bt.logging.info(f"Each UID will receive {self.TOTAL_QUERIES_PER_UID} total queries, " f"with {self.MIN_SCORED_QUERIES} of them being scored.") @@ -148,7 +149,7 @@ async def update_and_refresh(self, last_update): async def perform_synthetic_queries(self): while True: - if self.available_uids is None: + if not self.available_uids: await self.initialize_uids_and_capacities() current_block = self.get_current_block() @@ -166,7 +167,7 @@ async def perform_synthetic_queries(self): if not uids_to_query: bt.logging.info("All UIDs have received the maximum number of total queries.") - await asyncio.sleep(1) + await asyncio.sleep(10) continue # Prioritize UIDs with least total_queries_sent @@ -207,12 +208,11 @@ async def perform_queries(self, selected_validator, uids_to_query): response_tasks = [] query_tasks = [] provider_to_models = selected_validator.get_provider_to_models() - synapse_type = selected_validator.get_task_type() uids_to_query_expand = [] for provider, model in provider_to_models: for uid in uids_to_query: - band_width = self.uid_to_capacity.get(uid).bandwidth_rpm.get(f"{synapse_type}_{provider}") + band_width = self.uid_to_capacity.get(uid).bandwidth_rpm.get(f"{provider}").get(f"{model}") for _ in range(band_width): query_task = selected_validator.create_query(uid, provider, model) query_tasks.append(query_task) @@ -506,7 +506,10 @@ async def process_queries_from_database(self): vali_type = type(validator).__name__ type_to_validator[vali_type] = validator - grouped_key = f"{vali_type}:{uid}" + provider = synapse.provider + model = synapse.model + + grouped_key = f"{vali_type}:{uid}:{provider}:{model}" grouped_query_resps[grouped_key].append( (uid, {'query': synapse, 'response': response})) @@ -514,13 +517,13 @@ async def process_queries_from_database(self): vali_type = str(key).split(":")[0] if not uid_to_query_resps: continue - query_resp_to_score_for_uid = random.choice(uid_to_query_resps) - validator_to_query_resps[vali_type].append(query_resp_to_score_for_uid) + query_resp_to_score_for_uids = random.choices(uid_to_query_resps, k=self.max_score_cnt_per_model) + validator_to_query_resps[vali_type] += query_resp_to_score_for_uids score_tasks = [] for vali_type in type_to_validator: validator = type_to_validator[vali_type] - text_score_task = validator.score_responses(validator_to_query_resps[vali_type]) + text_score_task = validator.score_responses(validator_to_query_resps[vali_type], self.uid_to_capacity) score_tasks.append(text_score_task) resps = await asyncio.gather(*score_tasks) From 61936377421e4b8c0feeeadabb23de111315ffde Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 18 Sep 2024 03:17:17 -0700 Subject: [PATCH 11/80] adding cache based scoring functionality --- validators/services/cache.py | 60 ++++++++++++++++++++++++ validators/validator.py | 6 ++- validators/weight_setter.py | 89 ++++++++++++++++++++++++++++++------ 3 files changed, 140 insertions(+), 15 deletions(-) create mode 100644 validators/services/cache.py diff --git a/validators/services/cache.py b/validators/services/cache.py new file mode 100644 index 00000000..9dab56fc --- /dev/null +++ b/validators/services/cache.py @@ -0,0 +1,60 @@ +import sqlite3 +import time +import hashlib + + +class QueryResponseCache: + def __int__(self): + # Connect to (or create) the SQLite database + conn = sqlite3.connect('cache.db') + cursor = conn.cursor() + + # Create a table for caching (key, value, and expiry time) + cursor.execute(''' + CREATE TABLE IF NOT EXISTS cache ( + p_key TEXT PRIMARY KEY + question TEXT, + answer TEXT, + provider TEXT, + model TEXT, + timestamp REAL + ) + ''') + cursor.execute(''' + CREATE INDEX IF NOT EXISTS idx_provider_model ON cache (provider, model); + ''') + conn.commit() + self.conn = conn + + @staticmethod + def generate_hash(input_string): + return hashlib.sha256(input_string.encode('utf-8')).hexdigest() + + def set_cache(self, question, answer, provider, model, ttl=3600*24): + p_key = self.generate_hash(str(question) + str(provider) + str(model)) + expires_at = time.time() + ttl + cursor = self.conn.cursor() + cursor.execute(''' + INSERT OR REPLACE INTO cache (p_key, question, answer, provider, model, expires_at) + VALUES (?, ?, ?, ?, ?, ?) + ''', (p_key, question, answer, provider, model, expires_at)) + self.conn.commit() + + def get_cache(self, key): + cursor = self.conn.cursor() + cursor.execute(''' + SELECT value FROM cache WHERE p_key = ? AND expires_at > ? + ''', (key, time.time())) + result = cursor.fetchone() + return result[0] if result else None + + def get_all_question_to_answers(self, provider, model): + cursor = self.conn.cursor() + cursor.execute(''' + SELECT value FROM cache WHERE provider = ? AND model = ? AND expires_at > ? + ''', (provider, model, time.time())) + result = cursor.fetchall() + return list(result) if result else None + + def close(self): + self.conn.close() diff --git a/validators/validator.py b/validators/validator.py index a2bbc0a5..d6e28388 100644 --- a/validators/validator.py +++ b/validators/validator.py @@ -10,6 +10,7 @@ import cortext from cortext import utils from validators.weight_setter import WeightSetter +from validators.services.cache import QueryResponseCache # Load environment variables from .env file load_dotenv() @@ -126,7 +127,8 @@ def main(): init_wandb(config) loop = asyncio.get_event_loop() - weight_setter = WeightSetter(config=config) + conn = QueryResponseCache() + weight_setter = WeightSetter(config=config, conn=conn) state_path = os.path.join(config.full_path, "state.json") utils.get_state(state_path) try: @@ -139,6 +141,8 @@ def main(): bt.logging.info("updating status before exiting validator") state = utils.get_state(state_path) utils.save_state_to_file(state, state_path) + bt.logging.info("closing connection of cache database.") + conn.close() if config.wandb_on: wandb.finish() diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 54278e66..b8b03cf2 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -7,7 +7,7 @@ from black.trans import defaultdict from substrateinterface import SubstrateInterface from functools import partial -from typing import Tuple +from typing import Tuple, List import bittensor as bt import cortext @@ -17,15 +17,22 @@ from cortext.protocol import IsAlive, StreamPrompting, ImageResponse, Embeddings from cortext.metaclasses import ValidatorRegistryMeta from validators.services import CapacityService, BaseValidator +from validators.services.cache import QueryResponseCache from validators.utils import handle_response scoring_organic_timeout = 60 class WeightSetter: - def __init__(self, config): + def __init__(self, config, cache: QueryResponseCache): + + # Cache object using sqlite3. + self.in_cache_processing = False + self.batch_size = 30 + self.cache = cache + self.uid_to_capacity = {} - self.available_uids = [] + self.available_uid_to_axons = {} bt.logging.info("Initializing WeightSetter") self.config = config self.wallet = config.wallet @@ -101,14 +108,14 @@ async def refresh_metagraph(self): await self.run_sync_in_async(lambda: self.metagraph.sync()) async def initialize_uids_and_capacities(self): - self.available_uids = await self.get_available_uids() - bt.logging.info(f"Available UIDs: {list(self.available_uids.keys())}") - self.uid_to_capacity = await self.get_capacities_for_uids(self.available_uids) + self.available_uid_to_axons = await self.get_available_uids() + bt.logging.info(f"Available UIDs: {list(self.available_uid_to_axons.keys())}") + self.uid_to_capacity = await self.get_capacities_for_uids(self.available_uid_to_axons) bt.logging.info(f"Capacities for miners: {self.uid_to_capacity}") # Initialize total_scores, score_counts, and total_queries_sent - self.total_scores = {uid: 0.0 for uid in self.available_uids.keys()} - self.score_counts = {uid: 0 for uid in self.available_uids.keys()} - self.total_queries_sent = {uid: 0 for uid in self.available_uids.keys()} + self.total_scores = {uid: 0.0 for uid in self.available_uid_to_axons.keys()} + self.score_counts = {uid: 0 for uid in self.available_uid_to_axons.keys()} + self.total_queries_sent = {uid: 0 for uid in self.available_uid_to_axons.keys()} async def update_and_refresh(self, last_update): bt.logging.info(f"Setting weights, last update {last_update} blocks ago") @@ -145,11 +152,11 @@ async def update_and_refresh(self, last_update): self.score_counts[uid] = 0 self.total_queries_sent[uid] = 0 - self.available_uids = new_available_uids + self.available_uid_to_axons = new_available_uids async def perform_synthetic_queries(self): while True: - if not self.available_uids: + if not self.available_uid_to_axons: await self.initialize_uids_and_capacities() current_block = self.get_current_block() @@ -162,7 +169,7 @@ async def perform_synthetic_queries(self): # Decide which UIDs to query, considering total queries sent async with self.lock: # Select UIDs that have not reached TOTAL_QUERIES_PER_UID - uids_to_query = [uid for uid in self.available_uids + uids_to_query = [uid for uid in self.available_uid_to_axons if self.total_queries_sent[uid] < self.TOTAL_QUERIES_PER_UID] if not uids_to_query: @@ -255,7 +262,7 @@ def select_validator(self) -> BaseValidator: else: return image_validator - def get_validators(self): + def get_validators(self) -> List[BaseValidator]: validators = [] all_classes = ValidatorRegistryMeta.all_classes() for class_name, class_ref in all_classes.items(): @@ -526,7 +533,12 @@ async def process_queries_from_database(self): text_score_task = validator.score_responses(validator_to_query_resps[vali_type], self.uid_to_capacity) score_tasks.append(text_score_task) - resps = await asyncio.gather(*score_tasks) + if self.in_cache_processing: + async with self.lock: + resps = await asyncio.gather(*score_tasks) + else: + resps = await asyncio.gather(*score_tasks) + resps = [item for item in resps if item is not None] # Update total_scores and score_counts async with self.lock: @@ -538,3 +550,52 @@ async def process_queries_from_database(self): # Stop scoring if MIN_SCORED_QUERIES reached if self.score_counts[uid] >= self.MIN_SCORED_QUERIES: bt.logging.info(f"UID {uid} has reached the minimum number of scored queries.") + + @property + def batch_list_of_all_uids(self): + batch_size = self.batch_size + batched_list = [] + for i in range(0, len(self.available_uid_to_axons), batch_size): + batched_list.append(self.available_uid_to_axons.keys()[i:i + batch_size]) + return batched_list + + async def process_queries_from_cache_database(self): + for vali in self.get_validators(): + for provider, model in vali.get_provider_to_models(): + questions_answers: List[Tuple[str, str]] = self.cache.get_all_question_to_answers(provider, model) + # select one of questions_answers + query, answer = random.choice(questions_answers) + query_syn = vali.get_synapse_from_json(query) + + async def score_miners_based_cached_answer(self, vali, query, answer): + bt.logging.info("Starting cache based scoring process...") + total_query_resps = [] + def mock_create_query(): + return query + for batch_uids in self.batch_list_of_all_uids: + async with self.lock: + origin_ref = vali.create_query + vali.create_query = mock_create_query + query_responses = await self.perform_queries(vali, batch_uids) + vali.create_query = origin_ref + total_query_resps += query_responses + + bt.logging.debug(f"total cached query_resps: {total_query_resps}") + + + for uid, response_data in total_query_resps: + # Decide whether to score this query + self.query_database.append({ + 'uid': uid, + 'synapse': response_data['query'], + 'response': response_data['response'], + 'query_type': 'synthetic', + 'timestamp': asyncio.get_event_loop().time(), + 'validator': vali + }) + while self.query_database: + self.in_cache_processing = True + bt.logging.debug("Waiting for completing cache based scoring...") + await asyncio.sleep(10) + bt.logging.info("Successfully complete scoring for all miners with cached data") + return query_responses From de2bb6a4fe6119d38550b6b551173a6423c35bd9 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 18 Sep 2024 04:05:35 -0700 Subject: [PATCH 12/80] add mock function for scoring --- .../services/validators/base_validator.py | 4 + .../services/validators/image_validator.py | 5 + .../services/validators/text_validator.py | 6 + validators/weight_setter.py | 105 ++++++++++-------- 4 files changed, 76 insertions(+), 44 deletions(-) diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index 4d7492b9..9e912849 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -71,6 +71,10 @@ async def get_answer_task(self, uid, synapse, response): async def get_scoring_task(self, uid, answer, response): pass + @staticmethod + def get_synapse_from_json(data): + pass + @error_handler async def score_responses(self, uid_to_query_resps, uid_to_capacity): answering_tasks = [] diff --git a/validators/services/validators/image_validator.py b/validators/services/validators/image_validator.py index 270a9394..8479fda8 100644 --- a/validators/services/validators/image_validator.py +++ b/validators/services/validators/image_validator.py @@ -113,3 +113,8 @@ async def build_wandb_data(self, scores, responses): @classmethod def get_task_type(cls): return ImageResponse.__name__ + + @staticmethod + def get_synapse_from_json(data): + synapse = ImageResponse.parse_raw(data) + return synapse \ No newline at end of file diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index 02d61843..2ab54b33 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -175,3 +175,9 @@ async def get_scoring_task(self, uid, answer, response): @classmethod def get_task_type(cls): return StreamPrompting.__name__ + + @staticmethod + def get_synapse_from_json(data): + synapse = StreamPrompting.parse_raw(data) + return synapse + diff --git a/validators/weight_setter.py b/validators/weight_setter.py index b8b03cf2..8a277c62 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -491,6 +491,42 @@ async def consume_organic_queries(self): self.axon.start() bt.logging.info(f"Running validator on uid: {self.my_uid}") + def get_scoring_tasks_from_query_responses(self, queries_to_process): + + grouped_query_resps = defaultdict(list) + validator_to_query_resps = defaultdict(list) + type_to_validator = {} + + # Process queries outside the lock to prevent blocking + for query_data in queries_to_process: + uid = query_data['uid'] + synapse = query_data['synapse'] + response = query_data['response'] + validator = query_data['validator'] + vali_type = type(validator).__name__ + type_to_validator[vali_type] = validator + + provider = synapse.provider + model = synapse.model + + grouped_key = f"{vali_type}:{uid}:{provider}:{model}" + grouped_query_resps[grouped_key].append( + (uid, {'query': synapse, 'response': response})) + + for key, uid_to_query_resps in grouped_query_resps.items(): + vali_type = str(key).split(":")[0] + if not uid_to_query_resps: + continue + query_resp_to_score_for_uids = random.choices(uid_to_query_resps, k=self.max_score_cnt_per_model) + validator_to_query_resps[vali_type] += query_resp_to_score_for_uids + + score_tasks = [] + for vali_type in type_to_validator: + validator = type_to_validator[vali_type] + text_score_task = validator.score_responses(validator_to_query_resps[vali_type], self.uid_to_capacity) + score_tasks.append(text_score_task) + return score_tasks + async def process_queries_from_database(self): while True: await asyncio.sleep(1) # Adjust the sleep time as needed @@ -501,44 +537,9 @@ async def process_queries_from_database(self): queries_to_process = self.query_database.copy() self.query_database.clear() - grouped_query_resps = defaultdict(list) - validator_to_query_resps = defaultdict(list) - type_to_validator = {} - # Process queries outside the lock to prevent blocking - for query_data in queries_to_process: - uid = query_data['uid'] - synapse = query_data['synapse'] - response = query_data['response'] - validator = query_data['validator'] - vali_type = type(validator).__name__ - type_to_validator[vali_type] = validator - - provider = synapse.provider - model = synapse.model - - grouped_key = f"{vali_type}:{uid}:{provider}:{model}" - grouped_query_resps[grouped_key].append( - (uid, {'query': synapse, 'response': response})) - - for key, uid_to_query_resps in grouped_query_resps.items(): - vali_type = str(key).split(":")[0] - if not uid_to_query_resps: - continue - query_resp_to_score_for_uids = random.choices(uid_to_query_resps, k=self.max_score_cnt_per_model) - validator_to_query_resps[vali_type] += query_resp_to_score_for_uids - - score_tasks = [] - for vali_type in type_to_validator: - validator = type_to_validator[vali_type] - text_score_task = validator.score_responses(validator_to_query_resps[vali_type], self.uid_to_capacity) - score_tasks.append(text_score_task) - - if self.in_cache_processing: - async with self.lock: - resps = await asyncio.gather(*score_tasks) - else: - resps = await asyncio.gather(*score_tasks) + score_tasks = self.get_scoring_tasks_from_query_responses(queries_to_process) + resps = await asyncio.gather(*score_tasks) resps = [item for item in resps if item is not None] # Update total_scores and score_counts async with self.lock: @@ -566,12 +567,15 @@ async def process_queries_from_cache_database(self): # select one of questions_answers query, answer = random.choice(questions_answers) query_syn = vali.get_synapse_from_json(query) + await self.score_miners_based_cached_answer(vali, query_syn, answer) async def score_miners_based_cached_answer(self, vali, query, answer): bt.logging.info("Starting cache based scoring process...") total_query_resps = [] + def mock_create_query(): return query + for batch_uids in self.batch_list_of_all_uids: async with self.lock: origin_ref = vali.create_query @@ -582,10 +586,10 @@ def mock_create_query(): bt.logging.debug(f"total cached query_resps: {total_query_resps}") - + queries_to_process = [] for uid, response_data in total_query_resps: # Decide whether to score this query - self.query_database.append({ + queries_to_process.append({ 'uid': uid, 'synapse': response_data['query'], 'response': response_data['response'], @@ -593,9 +597,22 @@ def mock_create_query(): 'timestamp': asyncio.get_event_loop().time(), 'validator': vali }) - while self.query_database: - self.in_cache_processing = True - bt.logging.debug("Waiting for completing cache based scoring...") - await asyncio.sleep(10) - bt.logging.info("Successfully complete scoring for all miners with cached data") + + def mock_answer(): + return answer + + async with self.lock: + origin_ref = vali.get_answer_task + vali.get_answer_task = mock_answer + score_tasks = self.get_scoring_tasks_from_query_responses(queries_to_process) + responses = await asyncio.gather(*score_tasks) + vali.get_answer_task = origin_ref + + responses = [item for item in responses if item is not None] + for uid_scores_dict, _, _ in responses: + for uid, score in uid_scores_dict.items(): + self.total_scores[uid] += score + + bt.logging.info("Successfully complete scoring for all miners with cached data and " + f"total score is {self.total_scores}") return query_responses From 3ecce7d40647a1277ebf7f5ada521554b1ab36ae Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 18 Sep 2024 08:08:47 -0700 Subject: [PATCH 13/80] fix some issues and created cache decorator --- validators/services/cache.py | 15 ++++++++----- .../services/validators/base_validator.py | 3 --- .../services/validators/image_validator.py | 5 ++--- .../services/validators/text_validator.py | 12 ++++------ validators/utils.py | 22 +++++++++++++++---- validators/validator.py | 7 +++--- 6 files changed, 36 insertions(+), 28 deletions(-) diff --git a/validators/services/cache.py b/validators/services/cache.py index 9dab56fc..49f53cf6 100644 --- a/validators/services/cache.py +++ b/validators/services/cache.py @@ -4,7 +4,7 @@ class QueryResponseCache: - def __int__(self): + def __init__(self): # Connect to (or create) the SQLite database conn = sqlite3.connect('cache.db') cursor = conn.cursor() @@ -12,7 +12,7 @@ def __int__(self): # Create a table for caching (key, value, and expiry time) cursor.execute(''' CREATE TABLE IF NOT EXISTS cache ( - p_key TEXT PRIMARY KEY + p_key TEXT PRIMARY KEY, question TEXT, answer TEXT, provider TEXT, @@ -35,7 +35,7 @@ def set_cache(self, question, answer, provider, model, ttl=3600*24): expires_at = time.time() + ttl cursor = self.conn.cursor() cursor.execute(''' - INSERT OR REPLACE INTO cache (p_key, question, answer, provider, model, expires_at) + INSERT OR REPLACE INTO cache (p_key, question, answer, provider, model, timestamp) VALUES (?, ?, ?, ?, ?, ?) ''', (p_key, question, answer, provider, model, expires_at)) self.conn.commit() @@ -43,7 +43,7 @@ def set_cache(self, question, answer, provider, model, ttl=3600*24): def get_cache(self, key): cursor = self.conn.cursor() cursor.execute(''' - SELECT value FROM cache WHERE p_key = ? AND expires_at > ? + SELECT value FROM cache WHERE p_key = ? AND timestamp > ? ''', (key, time.time())) result = cursor.fetchone() return result[0] if result else None @@ -51,10 +51,13 @@ def get_cache(self, key): def get_all_question_to_answers(self, provider, model): cursor = self.conn.cursor() cursor.execute(''' - SELECT value FROM cache WHERE provider = ? AND model = ? AND expires_at > ? - ''', (provider, model, time.time())) + SELECT value FROM cache WHERE provider = ? AND model = ? + ''', (provider, model)) result = cursor.fetchall() return list(result) if result else None def close(self): self.conn.close() + + +cache_service = QueryResponseCache() \ No newline at end of file diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index 9e912849..ae0f2d89 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -60,9 +60,6 @@ async def create_query(self, uid): async def build_wandb_data(self, scores, responses): pass - def should_i_score(self): - return True - @abstractmethod async def get_answer_task(self, uid, synapse, response): pass diff --git a/validators/services/validators/image_validator.py b/validators/services/validators/image_validator.py index 8479fda8..f20942d0 100644 --- a/validators/services/validators/image_validator.py +++ b/validators/services/validators/image_validator.py @@ -1,13 +1,11 @@ import asyncio import random -import traceback import wandb import cortext.reward from cortext.protocol import ImageResponse -from validators.services.validators.base_validator import BaseValidator from validators import utils -from validators.utils import error_handler +from validators.utils import error_handler, save_answer_to_cache from cortext.utils import get_question import bittensor as bt @@ -84,6 +82,7 @@ async def get_scoring_task(self, uid, answer, response: ImageResponse): score = 0 # cortext.reward.deterministic_score(uid, syn, self.weight) return score + @save_answer_to_cache async def get_answer_task(self, uid, synapse: ImageResponse, response): return synapse diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index 2ab54b33..a743c370 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -11,12 +11,10 @@ from cortext.protocol import StreamPrompting from cortext.utils import (call_anthropic_bedrock, call_bedrock, call_anthropic, call_gemini, call_groq, call_openai, get_question) -from validators.utils import get_should_i_score_arr_for_text +from validators.utils import save_answer_to_cache class TextValidator(BaseValidator): - gen_should_i_score = get_should_i_score_arr_for_text() - def __init__(self, config, provider: str = None, model: str = None, metagraph=None): super().__init__(config, metagraph) self.streaming = True @@ -108,10 +106,6 @@ def get_provider_to_models(self): provider_models.append((provider, model_)) return provider_models - @classmethod - def should_i_score(cls): - return next(cls.gen_should_i_score) - @error_handler async def build_wandb_data(self, uid_to_score, responses): for uid, _ in self.uid_to_questions.items(): @@ -163,10 +157,12 @@ async def call_api(self, prompt: str, image_url: Optional[str], provider: str) - else: bt.logging.error(f"provider {provider} not found") + @save_answer_to_cache async def get_answer_task(self, uid: int, query_syn: StreamPrompting, response): prompt = query_syn.messages[0].get("content") image_url = query_syn.messages[0].get("image") - return await self.call_api(prompt, image_url, self.provider) + answer = await self.call_api(prompt, image_url, self.provider) + return answer async def get_scoring_task(self, uid, answer, response): response_str, _ = response diff --git a/validators/utils.py b/validators/utils.py index 027bdd81..80555d5f 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -11,7 +11,8 @@ from functools import wraps import logging -from cortext import ImageResponse +from cortext import ImageResponse, ALL_SYNAPSE_TYPE +from validators.services.cache import cache_service async def download_image(url): @@ -117,9 +118,22 @@ async def wrapper(*args, **kwargs): return wrapper -def get_should_i_score_arr_for_text(): - for i in itertools.count(): - yield (i % 3) == 0 +def save_answer_to_cache(func): + @wraps(func) + async def wrapper(*args, **kwargs): + answer = await func(*args, **kwargs) + query_syn: ALL_SYNAPSE_TYPE = args[2] + provider = query_syn.provider + model = query_syn.model + try: + cache_service.set_cache(question=str(query_syn.json()), answer=str(answer), provider=provider, model=model) + except Exception as err: + bt.logging.error(f"Exception during cache for uid {args[1]}, {err}") + else: + bt.logging.trace(f"saved answer to cache successfully.") + finally: + return answer + return wrapper def get_should_i_score_arr_for_image(): diff --git a/validators/validator.py b/validators/validator.py index d6e28388..3fdf92fe 100644 --- a/validators/validator.py +++ b/validators/validator.py @@ -10,7 +10,7 @@ import cortext from cortext import utils from validators.weight_setter import WeightSetter -from validators.services.cache import QueryResponseCache +from validators.services.cache import cache_service # Load environment variables from .env file load_dotenv() @@ -127,8 +127,7 @@ def main(): init_wandb(config) loop = asyncio.get_event_loop() - conn = QueryResponseCache() - weight_setter = WeightSetter(config=config, conn=conn) + weight_setter = WeightSetter(config=config, cache=cache_service) state_path = os.path.join(config.full_path, "state.json") utils.get_state(state_path) try: @@ -142,7 +141,7 @@ def main(): state = utils.get_state(state_path) utils.save_state_to_file(state, state_path) bt.logging.info("closing connection of cache database.") - conn.close() + cache_service.close() if config.wandb_on: wandb.finish() From d8925e173d71d69dc1fe426dfb72408a5419ade9 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 18 Sep 2024 09:40:35 -0700 Subject: [PATCH 14/80] complete cache system and scoring function for synthetic query --- validators/services/cache.py | 6 ++-- validators/weight_setter.py | 67 +++++++++++++++++++++--------------- 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/validators/services/cache.py b/validators/services/cache.py index 49f53cf6..25017405 100644 --- a/validators/services/cache.py +++ b/validators/services/cache.py @@ -51,10 +51,10 @@ def get_cache(self, key): def get_all_question_to_answers(self, provider, model): cursor = self.conn.cursor() cursor.execute(''' - SELECT value FROM cache WHERE provider = ? AND model = ? + SELECT question, answer FROM cache WHERE provider = ? AND model = ? ''', (provider, model)) - result = cursor.fetchall() - return list(result) if result else None + results = [(row[0], row[1]) for row in cursor.fetchall()] + return results def close(self): self.conn.close() diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 8a277c62..e900d9d2 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -18,7 +18,7 @@ from cortext.metaclasses import ValidatorRegistryMeta from validators.services import CapacityService, BaseValidator from validators.services.cache import QueryResponseCache -from validators.utils import handle_response +from validators.utils import handle_response, error_handler scoring_organic_timeout = 60 @@ -78,6 +78,7 @@ def __init__(self, config, cache: QueryResponseCache): self.loop.create_task(self.consume_organic_queries()) self.loop.create_task(self.perform_synthetic_queries()) self.loop.create_task(self.process_queries_from_database()) + self.loop.create_task(self.process_queries_from_cache_database()) async def run_sync_in_async(self, fn): return await self.loop.run_in_executor(self.thread_executor, fn) @@ -164,6 +165,10 @@ async def perform_synthetic_queries(self): if last_update >= self.tempo * 2 or ( self.get_blocks_til_epoch(current_block) < 10 and last_update >= self.weights_rate_limit): + async with self.lock: + bt.logging.info("start scoring with cache database") + await self.process_queries_from_cache_database() + bt.logging.info("complete scoring with cache database") await self.update_and_refresh(last_update) # Decide which UIDs to query, considering total queries sent @@ -215,7 +220,6 @@ async def perform_queries(self, selected_validator, uids_to_query): response_tasks = [] query_tasks = [] provider_to_models = selected_validator.get_provider_to_models() - uids_to_query_expand = [] for provider, model in provider_to_models: for uid in uids_to_query: @@ -554,38 +558,51 @@ async def process_queries_from_database(self): @property def batch_list_of_all_uids(self): + uids = list(self.available_uid_to_axons.keys()) batch_size = self.batch_size batched_list = [] - for i in range(0, len(self.available_uid_to_axons), batch_size): - batched_list.append(self.available_uid_to_axons.keys()[i:i + batch_size]) + for i in range(0, len(uids), batch_size): + batched_list.append(uids[i:i + batch_size]) return batched_list + @error_handler async def process_queries_from_cache_database(self): + # await self.initialize_uids_and_capacities() + tasks = [] for vali in self.get_validators(): for provider, model in vali.get_provider_to_models(): questions_answers: List[Tuple[str, str]] = self.cache.get_all_question_to_answers(provider, model) + if not questions_answers: + continue # select one of questions_answers query, answer = random.choice(questions_answers) query_syn = vali.get_synapse_from_json(query) - await self.score_miners_based_cached_answer(vali, query_syn, answer) + tasks.append(self.score_miners_based_cached_answer(vali, query_syn, answer)) + await asyncio.gather(*tasks) + bt.logging.info("Successfully complete scoring for all miners with cached data and " + f"total score is {self.total_scores}") async def score_miners_based_cached_answer(self, vali, query, answer): - bt.logging.info("Starting cache based scoring process...") total_query_resps = [] + provider = query.provider + model = query.model - def mock_create_query(): + async def mock_create_query(*args, **kwargs): return query + origin_ref_create_query = vali.create_query + origin_ref_provider_to_models = vali.get_provider_to_models + for batch_uids in self.batch_list_of_all_uids: - async with self.lock: - origin_ref = vali.create_query - vali.create_query = mock_create_query - query_responses = await self.perform_queries(vali, batch_uids) - vali.create_query = origin_ref + vali.create_query = mock_create_query + vali.get_provider_to_models = lambda: [(provider, model)] + query_responses = await self.perform_queries(vali, batch_uids) total_query_resps += query_responses - bt.logging.debug(f"total cached query_resps: {total_query_resps}") + vali.create_query = origin_ref_create_query + vali.get_provider_to_models = origin_ref_provider_to_models + bt.logging.debug(f"total cached query_resps: {len(total_query_resps)}") queries_to_process = [] for uid, response_data in total_query_resps: # Decide whether to score this query @@ -598,21 +615,17 @@ def mock_create_query(): 'validator': vali }) - def mock_answer(): + async def mock_answer(*args, **kwargs): return answer - async with self.lock: - origin_ref = vali.get_answer_task - vali.get_answer_task = mock_answer - score_tasks = self.get_scoring_tasks_from_query_responses(queries_to_process) - responses = await asyncio.gather(*score_tasks) - vali.get_answer_task = origin_ref + origin_ref_answer_task = vali.get_answer_task + vali.get_answer_task = mock_answer + score_tasks = self.get_scoring_tasks_from_query_responses(queries_to_process) + responses = await asyncio.gather(*score_tasks) + vali.get_answer_task = origin_ref_answer_task - responses = [item for item in responses if item is not None] - for uid_scores_dict, _, _ in responses: - for uid, score in uid_scores_dict.items(): - self.total_scores[uid] += score + responses = [item for item in responses if item is not None] + for uid_scores_dict, _, _ in responses: + for uid, score in uid_scores_dict.items(): + self.total_scores[uid] += score - bt.logging.info("Successfully complete scoring for all miners with cached data and " - f"total score is {self.total_scores}") - return query_responses From 5f4c2c28675708c0e3d5219bf1d1bcd28339cb55 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 18 Sep 2024 13:18:44 -0700 Subject: [PATCH 15/80] fix valdiator organic query issue and add cache functionality to it --- validators/weight_setter.py | 82 ++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 47 deletions(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index e900d9d2..ccfd7217 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -3,12 +3,14 @@ import random import torch import traceback +import time from black.trans import defaultdict from substrateinterface import SubstrateInterface from functools import partial from typing import Tuple, List import bittensor as bt +from bittensor import StreamingSynapse import cortext @@ -28,11 +30,12 @@ def __init__(self, config, cache: QueryResponseCache): # Cache object using sqlite3. self.in_cache_processing = False - self.batch_size = 30 + self.batch_size = config.max_miners_cnt self.cache = cache self.uid_to_capacity = {} self.available_uid_to_axons = {} + self.uids_to_query = [] bt.logging.info("Initializing WeightSetter") self.config = config self.wallet = config.wallet @@ -54,7 +57,6 @@ def __init__(self, config, cache: QueryResponseCache): # Initialize scores and counts self.total_scores = {} self.score_counts = {} # Number of times a UID has been scored - self.total_queries_sent = {} # Total queries sent to each UID self.moving_average_scores = None # Set up axon and dendrite @@ -78,7 +80,6 @@ def __init__(self, config, cache: QueryResponseCache): self.loop.create_task(self.consume_organic_queries()) self.loop.create_task(self.perform_synthetic_queries()) self.loop.create_task(self.process_queries_from_database()) - self.loop.create_task(self.process_queries_from_cache_database()) async def run_sync_in_async(self, fn): return await self.loop.run_in_executor(self.thread_executor, fn) @@ -110,13 +111,13 @@ async def refresh_metagraph(self): async def initialize_uids_and_capacities(self): self.available_uid_to_axons = await self.get_available_uids() + self.uids_to_query = list(self.available_uid_to_axons.keys()) bt.logging.info(f"Available UIDs: {list(self.available_uid_to_axons.keys())}") self.uid_to_capacity = await self.get_capacities_for_uids(self.available_uid_to_axons) bt.logging.info(f"Capacities for miners: {self.uid_to_capacity}") - # Initialize total_scores, score_counts, and total_queries_sent + # Initialize total_scores, score_counts. self.total_scores = {uid: 0.0 for uid in self.available_uid_to_axons.keys()} self.score_counts = {uid: 0 for uid in self.available_uid_to_axons.keys()} - self.total_queries_sent = {uid: 0 for uid in self.available_uid_to_axons.keys()} async def update_and_refresh(self, last_update): bt.logging.info(f"Setting weights, last update {last_update} blocks ago") @@ -132,26 +133,23 @@ async def update_and_refresh(self, last_update): bt.logging.info("Refreshing capacities...") self.uid_to_capacity = await self.get_capacities_for_uids(new_available_uids) - # Update total_scores, score_counts, and total_queries_sent + # Update total_scores, score_counts # Remove UIDs that are no longer available for uid in list(self.total_scores.keys()): if uid not in new_available_uids: del self.total_scores[uid] del self.score_counts[uid] - del self.total_queries_sent[uid] # Add new UIDs for uid in new_available_uids: if uid not in self.total_scores: self.total_scores[uid] = 0.0 self.score_counts[uid] = 0 - self.total_queries_sent[uid] = 0 # Reset counts for new epoch for uid in self.total_scores.keys(): self.total_scores[uid] = 0.0 self.score_counts[uid] = 0 - self.total_queries_sent[uid] = 0 self.available_uid_to_axons = new_available_uids @@ -173,26 +171,21 @@ async def perform_synthetic_queries(self): # Decide which UIDs to query, considering total queries sent async with self.lock: - # Select UIDs that have not reached TOTAL_QUERIES_PER_UID - uids_to_query = [uid for uid in self.available_uid_to_axons - if self.total_queries_sent[uid] < self.TOTAL_QUERIES_PER_UID] - - if not uids_to_query: - bt.logging.info("All UIDs have received the maximum number of total queries.") + if not self.uids_to_query: + bt.logging.info("All UIDs has been processed.") await asyncio.sleep(10) continue + else: + # Limit the number of UIDs to query based on configuration + uids_to_query_batch = self.uids_to_query[:self.batch_size] + # remove processing uids + self.uids_to_query = self.uids_to_query[self.batch_size:] - # Prioritize UIDs with least total_queries_sent - uids_to_query.sort(key=lambda uid: self.total_queries_sent[uid]) - - # Limit the number of UIDs to query based on configuration - num_uids_to_query = min(self.config.max_miners_cnt, len(uids_to_query)) - uids_to_query = uids_to_query[:num_uids_to_query] for selected_validator in self.get_validators(): # Perform synthetic queries bt.logging.info("start querying to miners") - query_responses = await self.perform_queries(selected_validator, uids_to_query) + query_responses = await self.perform_queries(selected_validator, uids_to_query_batch) # Store queries and responses in the shared database async with self.lock: for uid, response_data in query_responses: @@ -206,8 +199,8 @@ async def perform_synthetic_queries(self): 'validator': selected_validator }) await asyncio.sleep(1) - self.total_queries_sent[uid] += 1 - bt.logging.info(f"Performed synthetic queries for UIDs: {uids_to_query}") + + bt.logging.info(f"Performed synthetic queries for UIDs: {uids_to_query_batch}") # Slow down the validator steps if necessary await asyncio.sleep(1) @@ -387,8 +380,10 @@ async def images(self, synapse: ImageResponse) -> ImageResponse: bt.logging.info(f"Received {synapse}") axon = self.metagraph.axons[synapse.uid] - synapse_response = await self.dendrite(axon, synapse, deserialize=False, + start_time = time.time() + synapse_response:ImageResponse = await self.dendrite(axon, synapse, deserialize=False, timeout=synapse.timeout) + synapse_response.process_time = time.time() - start_time bt.logging.info(f"New synapse = {synapse_response}") # Store the query and response in the shared database @@ -402,8 +397,6 @@ async def images(self, synapse: ImageResponse) -> ImageResponse: 'validator': ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, metagraph=self.metagraph) }) - # Update total_queries_sent - self.total_queries_sent[synapse.uid] += 1 return synapse_response @@ -426,12 +419,10 @@ async def embeddings(self, synapse: Embeddings) -> Embeddings: 'validator': ValidatorRegistryMeta.get_class('EmbeddingsValidator')(config=self.config, metagraph=self.metagraph) }) - # Update total_queries_sent - self.total_queries_sent[synapse.uid] += 1 return synapse_response - async def prompt(self, synapse: StreamPrompting) -> StreamPrompting: + async def prompt(self, synapse: StreamPrompting) -> StreamingSynapse.BTStreamingResponse: bt.logging.info(f"Received {synapse}") # Return the streaming response @@ -439,6 +430,9 @@ async def _prompt(synapse, send: Send): bt.logging.info(f"Sending {synapse} request to uid: {synapse.uid}") axon = self.metagraph.axons[synapse.uid] + start_time = time.time() + + await self.dendrite.aclose_session() responses = await self.dendrite( axons=[axon], synapse=synapse, @@ -449,16 +443,15 @@ async def _prompt(synapse, send: Send): response_text = '' - for resp in responses: - async for chunk in resp: - if isinstance(chunk, str): - await send({ - "type": "http.response.body", - "body": chunk.encode("utf-8"), - "more_body": True, - }) - bt.logging.info(f"Streamed text: {chunk}") - response_text += chunk + async for chunk in responses[0]: + if isinstance(chunk, str): + await send({ + "type": "http.response.body", + "body": chunk.encode("utf-8"), + "more_body": True, + }) + bt.logging.info(f"Streamed text: {chunk}") + response_text += chunk await send({"type": "http.response.body", "body": b'', "more_body": False}) @@ -467,14 +460,12 @@ async def _prompt(synapse, send: Send): self.query_database.append({ 'uid': synapse.uid, 'synapse': synapse, - 'response': response_text, + 'response': (response_text, time.time() - start_time), 'query_type': 'organic', 'timestamp': asyncio.get_event_loop().time(), 'validator': ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) }) - # Update total_queries_sent - self.total_queries_sent[synapse.uid] += 1 token_streamer = partial(_prompt, synapse) return synapse.create_streaming_response(token_streamer) @@ -536,6 +527,7 @@ async def process_queries_from_database(self): await asyncio.sleep(1) # Adjust the sleep time as needed async with self.lock: if not self.query_database: + bt.logging.trace("no data in query_database. so continue...") continue # Copy queries to process and clear the database queries_to_process = self.query_database.copy() @@ -552,10 +544,6 @@ async def process_queries_from_database(self): self.total_scores[uid] += score self.score_counts[uid] += 1 - # Stop scoring if MIN_SCORED_QUERIES reached - if self.score_counts[uid] >= self.MIN_SCORED_QUERIES: - bt.logging.info(f"UID {uid} has reached the minimum number of scored queries.") - @property def batch_list_of_all_uids(self): uids = list(self.available_uid_to_axons.keys()) From 5f3011dee6066259c4f001c69e2a45b9f866a954 Mon Sep 17 00:00:00 2001 From: acer-king Date: Thu, 19 Sep 2024 10:11:10 -0700 Subject: [PATCH 16/80] fix issue of exceeding max_request per min while processing cache database --- cortext/utils.py | 2 +- miner/providers/anthropicbedrock.py | 1 - validators/weight_setter.py | 15 +++++++++++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/cortext/utils.py b/cortext/utils.py index 0ded5311..2e803125 100644 --- a/cortext/utils.py +++ b/cortext/utils.py @@ -458,7 +458,7 @@ def extract_python_list(text: str): async def call_openai(messages, temperature, model, seed=1234, max_tokens=2048, top_p=1): for _ in range(2): bt.logging.debug( - f"Calling Openai. Temperature = {temperature}, Model = {model}, Seed = {seed}, Messages = {messages}" + f"Calling Openai to get answer. Temperature = {temperature}, Model = {model}, Seed = {seed}, Messages = {messages}" ) try: message = messages[0] diff --git a/miner/providers/anthropicbedrock.py b/miner/providers/anthropicbedrock.py index e9e25afc..dabe552b 100644 --- a/miner/providers/anthropicbedrock.py +++ b/miner/providers/anthropicbedrock.py @@ -34,7 +34,6 @@ async def _prompt(self, synapse: StreamPrompting, send: Send): ) except Exception as err: bt.logging.exception(err) - bt.logging.info("errrr acerr") await send({"type": "http.response.body", "body": b'', "more_body": False}) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index ccfd7217..a0fc570f 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -566,7 +566,19 @@ async def process_queries_from_cache_database(self): query, answer = random.choice(questions_answers) query_syn = vali.get_synapse_from_json(query) tasks.append(self.score_miners_based_cached_answer(vali, query_syn, answer)) - await asyncio.gather(*tasks) + + # process tasks in batch_size to not exceed max request per 2min. + batched_task_list = [] + for i in range(0, len(tasks), cortext.MAX_REQUESTS): + batched_task_list.append(tasks[i:i + cortext.MAX_REQUESTS]) + for batch_tasks in batched_task_list: + start_time = time.time() + await asyncio.gather(*batch_tasks) + passed_time = time.time() - start_time + sleep_time = max(cortext.MIN_REQUEST_PERIOD * 60 - passed_time, 1) + bt.logging.debug(f"wait time {sleep_time} to not exceed max_request {cortext.MAX_REQUESTS} in 2min") + await asyncio.sleep(sleep_time) + bt.logging.info("Successfully complete scoring for all miners with cached data and " f"total score is {self.total_scores}") @@ -616,4 +628,3 @@ async def mock_answer(*args, **kwargs): for uid_scores_dict, _, _ in responses: for uid, score in uid_scores_dict.items(): self.total_scores[uid] += score - From 5d9ab9f8b3f7fbd385fba9cc5cc18c3a06fb588d Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 20 Sep 2024 02:05:18 -0700 Subject: [PATCH 17/80] add custom dendrite --- validators/dendrite.py | 71 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 validators/dendrite.py diff --git a/validators/dendrite.py b/validators/dendrite.py new file mode 100644 index 00000000..11258c86 --- /dev/null +++ b/validators/dendrite.py @@ -0,0 +1,71 @@ +from collections import defaultdict + +from typing import Union, AsyncGenerator, Any, List +from pydantic import BaseModel +import bittensor +from bittensor import dendrite, axon +import bittensor as bt +from cortext import ALL_SYNAPSE_TYPE, MIN_REQUEST_PERIOD + + +class Request(BaseModel): + target_axon: Union[bittensor.AxonInfo, bittensor.axon] + synapse: ALL_SYNAPSE_TYPE = bittensor.Synapse(), + timeout: float = 12.0, + deserialize: bool = True + + +class Dendrite(dendrite): + # class variable to store all status of miners. + hotkey_to_uid_capacity = defaultdict(tuple) + requests_queue: List[Request] = [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + @classmethod + def push_request_queue(cls, request): + cls.push_request_queue(request) + + + @classmethod + def get_remaining_capacity(cls, target_axon: axon, synapse: ALL_SYNAPSE_TYPE): + hotkey = target_axon.info().hotkey + uid, cap = cls.miners_to_capacity[hotkey] + provider = synapse.provider + model = synapse.model + return uid, cap.get(provider).get(model) + + @classmethod + def decrease_capacity(cls, target_axon: axon, synapse: ALL_SYNAPSE_TYPE): + pass + + async def call_stream( + self, + target_axon: Union[bittensor.AxonInfo, bittensor.axon], + synapse: ALL_SYNAPSE_TYPE = bittensor.Synapse(), # type: ignore + timeout: float = 12.0, + deserialize: bool = True + ) -> AsyncGenerator[Any, Any]: + uid, remain_cap = Dendrite.get_remaining_capacity(target_axon, synapse) + if remain_cap > 0: + # decrease capacity by one as it's used. + + return super().call_stream(target_axon, synapse, timeout, deserialize) + else: + bt.logging.debug(f"remain_cap is {remain_cap} for this uid {uid}. so can't send request.") + raise StopAsyncIteration + + async def call( + self, + target_axon: Union[bittensor.AxonInfo, bittensor.axon], + synapse: ALL_SYNAPSE_TYPE = bittensor.Synapse(), + timeout: float = 12.0, + deserialize: bool = True, + ) -> bittensor.Synapse: + uid, remain_cap = Dendrite.get_remaining_capacity(target_axon, synapse) + if remain_cap > 0: + return await super().call(target_axon, synapse, timeout, deserialize) + else: + bt.logging.debug(f"remain_cap is {remain_cap} for this uid {uid}. so can't send request.") + return synapse From cb74d797a2b13780ad23ddad52d374294dc607b8 Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 20 Sep 2024 02:54:33 -0700 Subject: [PATCH 18/80] process organic queries first and then make synthetic query --- validators/dendrite.py | 49 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 42 insertions(+), 7 deletions(-) diff --git a/validators/dendrite.py b/validators/dendrite.py index 11258c86..c541d69e 100644 --- a/validators/dendrite.py +++ b/validators/dendrite.py @@ -1,6 +1,9 @@ +import asyncio from collections import defaultdict from typing import Union, AsyncGenerator, Any, List +from enum import Enum + from pydantic import BaseModel import bittensor from bittensor import dendrite, axon @@ -8,30 +11,62 @@ from cortext import ALL_SYNAPSE_TYPE, MIN_REQUEST_PERIOD +class RequestType(str, Enum): # Inherit from str to enforce the value type as string + organic_type = 'organic' + synthetic_type = 'synthetic' + + class Request(BaseModel): target_axon: Union[bittensor.AxonInfo, bittensor.axon] - synapse: ALL_SYNAPSE_TYPE = bittensor.Synapse(), - timeout: float = 12.0, + synapse: ALL_SYNAPSE_TYPE = bittensor.Synapse() + timeout: float = 12.0 deserialize: bool = True + type: RequestType + stream: False class Dendrite(dendrite): # class variable to store all status of miners. hotkey_to_uid_capacity = defaultdict(tuple) - requests_queue: List[Request] = [] + synthetic_requests_queue: List[Request] = [] + organic_requests_queue: List[Request] = [] def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @classmethod - def push_request_queue(cls, request): - cls.push_request_queue(request) + def push_request_queue(cls, request: Request): + if request.type == RequestType.organic_type: + cls.organic_requests_queue.append(request) + if request.type == RequestType.synthetic_type: + cls.synthetic_requests_queue.append(request) + @classmethod + def process_requests(cls): + while True: + if cls.organic_requests_queue: + # distribute organic queries to miners according to bandwidth. + bt.logging.info("# distribute organic queries to miners according to bandwidth.") + organic_tasks = [] + for request in cls.organic_requests_queue: + uid, cap = cls.get_remaining_capacity(request) + if cap > 0: + if request.stream: + task = super().call_stream(target_axon=request.target_axon, synapse=request.synapse, + timeout=request.timeout, + deserialize=request.deserialize) + else: + task = super().call(target_axon=request.target_axon, synapse=request.synapse, + timeout=request.timeout, + deserialize=request.deserialize) + results = asyncio.gather(*organic_tasks) @classmethod - def get_remaining_capacity(cls, target_axon: axon, synapse: ALL_SYNAPSE_TYPE): + def get_remaining_capacity(cls, request): + target_axon = request.target_axon + synapse = request.synapse hotkey = target_axon.info().hotkey - uid, cap = cls.miners_to_capacity[hotkey] + uid, cap = cls.hotkey_to_uid_capacity[hotkey] provider = synapse.provider model = synapse.model return uid, cap.get(provider).get(model) From 29f363521931ad130d9d226f875e29f09b0b12a1 Mon Sep 17 00:00:00 2001 From: acer-king Date: Mon, 23 Sep 2024 01:47:06 -0700 Subject: [PATCH 19/80] refresh capacity per 7.2min --- validators/dendrite.py | 67 +++++++++++++++++++++------ validators/services/capacity.py | 11 +++++ validators/services/message_broker.py | 0 3 files changed, 64 insertions(+), 14 deletions(-) create mode 100644 validators/services/message_broker.py diff --git a/validators/dendrite.py b/validators/dendrite.py index c541d69e..fd30318b 100644 --- a/validators/dendrite.py +++ b/validators/dendrite.py @@ -23,6 +23,7 @@ class Request(BaseModel): deserialize: bool = True type: RequestType stream: False + request_id: int class Dendrite(dendrite): @@ -31,6 +32,7 @@ class Dendrite(dendrite): synthetic_requests_queue: List[Request] = [] organic_requests_queue: List[Request] = [] + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -42,24 +44,61 @@ def push_request_queue(cls, request: Request): cls.synthetic_requests_queue.append(request) @classmethod - def process_requests(cls): + async def process_requests(cls): while True: + task_to_request_id = {} if cls.organic_requests_queue: # distribute organic queries to miners according to bandwidth. - bt.logging.info("# distribute organic queries to miners according to bandwidth.") - organic_tasks = [] + bt.logging.info("distribute organic queries to miners according to bandwidth.") for request in cls.organic_requests_queue: - uid, cap = cls.get_remaining_capacity(request) - if cap > 0: - if request.stream: - task = super().call_stream(target_axon=request.target_axon, synapse=request.synapse, - timeout=request.timeout, - deserialize=request.deserialize) - else: - task = super().call(target_axon=request.target_axon, synapse=request.synapse, - timeout=request.timeout, - deserialize=request.deserialize) - results = asyncio.gather(*organic_tasks) + task = asyncio.create_task(cls.create_task_from_request(request)) + task_to_request_id[task] = request.request_id + if cls.synthetic_requests_queue: + bt.logging.info("start synthetic query and test bandwidth for all miners.") + for request in cls.synthetic_requests_queue: + task = asyncio.create_task(cls.create_task_from_request(request)) + task_to_request_id[task] = request.request_id + pass + + for completed_task in asyncio.as_completed(task_to_request_id): + result = await completed_task + request_id = task_to_request_id[completed_task] + bt.logging.info(f"request {request_id} is complete. {result}") + # push result to redis. + + # process result + await asyncio.sleep(1) + + @classmethod + async def create_task_from_request(cls, request): + uid, cap = cls.get_remaining_capacity(request) + if cap > 0: + if request.stream: + task = super().call_stream(target_axon=request.target_axon, synapse=request.synapse, + timeout=request.timeout, + deserialize=request.deserialize) + else: + task = await super().call(target_axon=request.target_axon, synapse=request.synapse, + timeout=request.timeout, + deserialize=request.deserialize) + cls.decrease_remaining_cap_after_request(request) + return task + bt.logging.info(f"can't process this request because all miners are too busy now") + return None + + @classmethod + def decrease_remaining_cap_after_request(cls, request): + target_axon = request.target_axon + synapse = request.synapse + hotkey = target_axon.info().hotkey + uid, cap = cls.hotkey_to_uid_capacity[hotkey] + provider = synapse.provider + model = synapse.model + cap[provider][model] = cap - 1 + + # update capacity with decreased by 1 for provider, model + cls.hotkey_to_uid_capacity[hotkey] = uid, cap + @classmethod def get_remaining_capacity(cls, request): diff --git a/validators/services/capacity.py b/validators/services/capacity.py index 51a26f42..c5efa4d2 100644 --- a/validators/services/capacity.py +++ b/validators/services/capacity.py @@ -1,6 +1,8 @@ import asyncio +from copy import deepcopy from cortext.protocol import Bandwidth +from cortext import MIN_REQUEST_PERIOD import bittensor as bt @@ -9,6 +11,9 @@ def __init__(self, metagraph, dendrite): self.metagraph = metagraph self.dendrite: bt.dendrite = dendrite self.timeout = 4 + self.uid_to_capacity = {} + self.remain_uid_to_capacity = {} + self.epoch_len = MIN_REQUEST_PERIOD async def query_capacity_to_miners(self, available_uids): capacity_query_tasks = [] @@ -29,4 +34,10 @@ async def query_capacity_to_miners(self, available_uids): bt.logging.error(f"exception happens while querying capacity to miner {uid}, {resp}") else: uid_to_capacity[uid] = resp + self.uid_to_capacity = deepcopy(uid_to_capacity) return uid_to_capacity + + async def refresh_capacity_per_epoch(self): + while True: + self.remain_uid_to_capacity = deepcopy(self.uid_to_capacity) + await asyncio.sleep(self.epoch_len * 60) diff --git a/validators/services/message_broker.py b/validators/services/message_broker.py new file mode 100644 index 00000000..e69de29b From bc40be28b22bfe999fc7589ec3f5ef32f2fff2b6 Mon Sep 17 00:00:00 2001 From: acer-king Date: Mon, 23 Sep 2024 08:55:30 -0700 Subject: [PATCH 20/80] add wokers add woker_mgr --- cortext/__init__.py | 2 +- validators/dendrite.py | 10 +++++--- validators/services/message_broker.py | 0 validators/services/worker_manager.py | 34 +++++++++++++++++++++++++++ validators/services/workers.py | 32 +++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 4 deletions(-) delete mode 100644 validators/services/message_broker.py create mode 100644 validators/services/worker_manager.py create mode 100644 validators/services/workers.py diff --git a/cortext/__init__.py b/cortext/__init__.py index c073a18f..d5d1907a 100644 --- a/cortext/__init__.py +++ b/cortext/__init__.py @@ -52,7 +52,7 @@ IMAGE_BLACKLIST_STAKE = 5000 EMBEDDING_BLACKLIST_STAKE = 5000 ISALIVE_BLACKLIST_STAKE = min(PROMPT_BLACKLIST_STAKE, IMAGE_BLACKLIST_STAKE, EMBEDDING_BLACKLIST_STAKE) -MIN_REQUEST_PERIOD = 2 +MIN_REQUEST_PERIOD = 7.2 MAX_REQUESTS = 20 # must have the test_key whitelisted to avoid a global blacklist testnet_key = ["5EhEZN6soubtKJm8RN7ANx9FGZ2JezxBUFxr45cdsHtDp3Uk"] diff --git a/validators/dendrite.py b/validators/dendrite.py index fd30318b..e247aa7c 100644 --- a/validators/dendrite.py +++ b/validators/dendrite.py @@ -31,11 +31,12 @@ class Dendrite(dendrite): hotkey_to_uid_capacity = defaultdict(tuple) synthetic_requests_queue: List[Request] = [] organic_requests_queue: List[Request] = [] - + lock = asyncio.Lock() def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) + @classmethod def push_request_queue(cls, request: Request): if request.type == RequestType.organic_type: @@ -49,8 +50,11 @@ async def process_requests(cls): task_to_request_id = {} if cls.organic_requests_queue: # distribute organic queries to miners according to bandwidth. + async with cls.lock: + cls.organic_requests_copy = cls.organic_requests_queue.copy() + cls.organic_requests_queue.clear() bt.logging.info("distribute organic queries to miners according to bandwidth.") - for request in cls.organic_requests_queue: + for request in cls.organic_requests_copy: task = asyncio.create_task(cls.create_task_from_request(request)) task_to_request_id[task] = request.request_id if cls.synthetic_requests_queue: @@ -66,7 +70,7 @@ async def process_requests(cls): bt.logging.info(f"request {request_id} is complete. {result}") # push result to redis. - # process result + # wait for 1 sec so that queues are filled with requests. await asyncio.sleep(1) @classmethod diff --git a/validators/services/message_broker.py b/validators/services/message_broker.py deleted file mode 100644 index e69de29b..00000000 diff --git a/validators/services/worker_manager.py b/validators/services/worker_manager.py new file mode 100644 index 00000000..8a2015ab --- /dev/null +++ b/validators/services/worker_manager.py @@ -0,0 +1,34 @@ +import redis +import json +import bittensor as bt + + +class TaskMgr: + def __init__(self): + # Initialize Redis client + self.redis_client = redis.StrictRedis(host='redis', port=6379, db=0) + self.available_works = [] + self.workers = self.get_available_workers() + + def assign_task(self, task): + + # Find the worker with the most available resources (simplified logic) + selected_worker = max(self.workers, + key=lambda w: self.workers[w]) # Example: Assign to worker with max remaining bandwidth + if self.workers[selected_worker] <= 0: + bt.logging.debug(f"no available resources to assign this task.") + return None + + bt.logging.debug(f"Assigning task {task} to {selected_worker}") + # decrease remaining capacity after sending request. + self.workers[selected_worker] -= 1 + # Push task to the selected worker's task queue + self.redis_client.rpush(f"tasks:{selected_worker}", task) + + def get_available_workers(self): + # Get all workers' resource info + workers = self.redis_client.hgetall("workers") + worker_to_bandwidth = {} + for worker_id, bandwidth in workers.items(): + worker_to_bandwidth[worker_id] = bandwidth + return worker_to_bandwidth diff --git a/validators/services/workers.py b/validators/services/workers.py new file mode 100644 index 00000000..1abdda66 --- /dev/null +++ b/validators/services/workers.py @@ -0,0 +1,32 @@ +import redis +import time +import bittensor as bt + + +class Worker: + # Initialize Redis client + redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) + + def __init__(self, worker_id, bandwidth, config): + self.worker_id = worker_id + self.bandwidth = bandwidth + self.dendrite = bt.dendrite(config.wallet) + self.report_resources() + + def report_resources(self): + # Store worker's resource info in Redis hash + self.redis_client.hset("workers", self.worker_id, self.bandwidth) + + async def pull_task(self): + # Pull task from worker-specific queue + task = self.redis_client.lpop(f"tasks:{self.worker_id}") + if task: + bt.logging.trace(f"Worker {self.worker_id} received task: {task}") + # Execute the task (in this example, just print the task) + try: + result = await self.dendrite.query(axons=[task.axon], synapse=task.synapse) + except Exception as err: + bt.logging.exception(err) + bt.logging.trace(f"Worker {self.worker_id} completed task: {task}") + return result + return False From fa3163acb67d86e8cdbe8f47d8bc8577c63d4769 Mon Sep 17 00:00:00 2001 From: acer-king Date: Mon, 23 Sep 2024 11:32:24 -0700 Subject: [PATCH 21/80] add workers and task manager and add stream with celery --- cortext/protocol.py | 11 ++++-- validators/dendrite.py | 7 +--- validators/services/worker_manager.py | 34 ---------------- validators/services/workers.py | 32 --------------- validators/task_manager.py | 40 +++++++++++++++++++ validators/weight_setter.py | 19 +++++---- validators/workers.py | 56 +++++++++++++++++++++++++++ 7 files changed, 115 insertions(+), 84 deletions(-) delete mode 100644 validators/services/worker_manager.py delete mode 100644 validators/services/workers.py create mode 100644 validators/task_manager.py create mode 100644 validators/workers.py diff --git a/cortext/protocol.py b/cortext/protocol.py index 27fa3b02..26b105a0 100644 --- a/cortext/protocol.py +++ b/cortext/protocol.py @@ -292,6 +292,12 @@ class StreamPrompting(bt.StreamingSynapse): title="streaming", description="whether to stream the output", ) + deserialize: bool = pydantic.Field( + default=True + ) + task_id: int = pydantic.Field( + default=0 + ) async def process_streaming_response(self, response: StreamingResponse) -> AsyncIterator[str]: if self.completion is None: @@ -303,9 +309,6 @@ async def process_streaming_response(self, response: StreamingResponse) -> Async self.completion += token yield tokens - def deserialize(self) -> str: - return self.completion - def extract_response_json(self, response: StreamingResponse) -> dict: headers = { k.decode("utf-8"): v.decode("utf-8") @@ -338,4 +341,4 @@ def extract_info(prefix: str) -> dict[str, str]: "timeout": self.timeout, "streaming": self.streaming, "uid": self.uid, - } \ No newline at end of file + } diff --git a/validators/dendrite.py b/validators/dendrite.py index e247aa7c..7212186a 100644 --- a/validators/dendrite.py +++ b/validators/dendrite.py @@ -141,9 +141,4 @@ async def call( timeout: float = 12.0, deserialize: bool = True, ) -> bittensor.Synapse: - uid, remain_cap = Dendrite.get_remaining_capacity(target_axon, synapse) - if remain_cap > 0: - return await super().call(target_axon, synapse, timeout, deserialize) - else: - bt.logging.debug(f"remain_cap is {remain_cap} for this uid {uid}. so can't send request.") - return synapse + pass \ No newline at end of file diff --git a/validators/services/worker_manager.py b/validators/services/worker_manager.py deleted file mode 100644 index 8a2015ab..00000000 --- a/validators/services/worker_manager.py +++ /dev/null @@ -1,34 +0,0 @@ -import redis -import json -import bittensor as bt - - -class TaskMgr: - def __init__(self): - # Initialize Redis client - self.redis_client = redis.StrictRedis(host='redis', port=6379, db=0) - self.available_works = [] - self.workers = self.get_available_workers() - - def assign_task(self, task): - - # Find the worker with the most available resources (simplified logic) - selected_worker = max(self.workers, - key=lambda w: self.workers[w]) # Example: Assign to worker with max remaining bandwidth - if self.workers[selected_worker] <= 0: - bt.logging.debug(f"no available resources to assign this task.") - return None - - bt.logging.debug(f"Assigning task {task} to {selected_worker}") - # decrease remaining capacity after sending request. - self.workers[selected_worker] -= 1 - # Push task to the selected worker's task queue - self.redis_client.rpush(f"tasks:{selected_worker}", task) - - def get_available_workers(self): - # Get all workers' resource info - workers = self.redis_client.hgetall("workers") - worker_to_bandwidth = {} - for worker_id, bandwidth in workers.items(): - worker_to_bandwidth[worker_id] = bandwidth - return worker_to_bandwidth diff --git a/validators/services/workers.py b/validators/services/workers.py deleted file mode 100644 index 1abdda66..00000000 --- a/validators/services/workers.py +++ /dev/null @@ -1,32 +0,0 @@ -import redis -import time -import bittensor as bt - - -class Worker: - # Initialize Redis client - redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) - - def __init__(self, worker_id, bandwidth, config): - self.worker_id = worker_id - self.bandwidth = bandwidth - self.dendrite = bt.dendrite(config.wallet) - self.report_resources() - - def report_resources(self): - # Store worker's resource info in Redis hash - self.redis_client.hset("workers", self.worker_id, self.bandwidth) - - async def pull_task(self): - # Pull task from worker-specific queue - task = self.redis_client.lpop(f"tasks:{self.worker_id}") - if task: - bt.logging.trace(f"Worker {self.worker_id} received task: {task}") - # Execute the task (in this example, just print the task) - try: - result = await self.dendrite.query(axons=[task.axon], synapse=task.synapse) - except Exception as err: - bt.logging.exception(err) - bt.logging.trace(f"Worker {self.worker_id} completed task: {task}") - return result - return False diff --git a/validators/task_manager.py b/validators/task_manager.py new file mode 100644 index 00000000..447f768c --- /dev/null +++ b/validators/task_manager.py @@ -0,0 +1,40 @@ +import redis +import bittensor as bt + +from cortext import ALL_SYNAPSE_TYPE +from validators.workers import Worker + + +class TaskMgr: + def __init__(self, uid_to_capacities, config): + # Initialize Redis client + self.redis_client = redis.StrictRedis(host='redis', port=6379, db=0) + self.workers = [] + self.create_workers(uid_to_capacities) + self.config = config + + def assign_task(self, task: ALL_SYNAPSE_TYPE): + + # Find the worker with the most available resources (simplified logic) + selected_worker = max(self.workers, + key=lambda w: self.workers[w]) # Example: Assign to worker with max remaining bandwidth + if self.workers[selected_worker] <= 0: + bt.logging.debug(f"no available resources to assign this task.") + return None + + bt.logging.trace(f"Assigning task {task} to {selected_worker}") + # decrease remaining capacity after sending request. + self.workers[selected_worker] -= 1 + # Push task to the selected worker's task queue + self.redis_client.rpush(f"tasks:{selected_worker}", task.json()) + + def create_workers(self, uid_to_capacities): + # create worker for each uid, provider, model + workers = [] + for uid, cap_info in uid_to_capacities.items(): + for provider, model_to_cap in cap_info.items(): + for model, cap in model_to_cap.items(): + worker_id = f"{uid}_{provider}_{model}" + worker = Worker(worker_id, cap, config=self.config) + workers.append(worker) + self.workers = workers diff --git a/validators/weight_setter.py b/validators/weight_setter.py index a0fc570f..152d1825 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -21,6 +21,7 @@ from validators.services import CapacityService, BaseValidator from validators.services.cache import QueryResponseCache from validators.utils import handle_response, error_handler +from validators.task_manager import TaskMgr scoring_organic_timeout = 60 @@ -75,10 +76,12 @@ def __init__(self, config, cache: QueryResponseCache): self.tempo = self.subtensor.tempo(self.netuid) self.weights_rate_limit = self.get_weights_rate_limit() + asyncio.run(self.initialize_uids_and_capacities()) + self.task_mgr = TaskMgr(uid_to_capacities=self.uid_to_capacity, config=config) # Set up async tasks self.thread_executor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix='asyncio') self.loop.create_task(self.consume_organic_queries()) - self.loop.create_task(self.perform_synthetic_queries()) + # self.loop.create_task(self.perform_synthetic_queries()) self.loop.create_task(self.process_queries_from_database()) async def run_sync_in_async(self, fn): @@ -181,7 +184,6 @@ async def perform_synthetic_queries(self): # remove processing uids self.uids_to_query = self.uids_to_query[self.batch_size:] - for selected_validator in self.get_validators(): # Perform synthetic queries bt.logging.info("start querying to miners") @@ -381,8 +383,8 @@ async def images(self, synapse: ImageResponse) -> ImageResponse: axon = self.metagraph.axons[synapse.uid] start_time = time.time() - synapse_response:ImageResponse = await self.dendrite(axon, synapse, deserialize=False, - timeout=synapse.timeout) + synapse_response: ImageResponse = await self.dendrite(axon, synapse, deserialize=False, + timeout=synapse.timeout) synapse_response.process_time = time.time() - start_time bt.logging.info(f"New synapse = {synapse_response}") @@ -426,13 +428,14 @@ async def prompt(self, synapse: StreamPrompting) -> StreamingSynapse.BTStreaming bt.logging.info(f"Received {synapse}") # Return the streaming response - async def _prompt(synapse, send: Send): + async def _prompt(synapse: StreamPrompting, send: Send): bt.logging.info(f"Sending {synapse} request to uid: {synapse.uid}") - - axon = self.metagraph.axons[synapse.uid] start_time = time.time() - await self.dendrite.aclose_session() + synapse.deserialize = False + synapse.streaming = True + + self.task_mgr.assign_task(synapse) responses = await self.dendrite( axons=[axon], synapse=synapse, diff --git a/validators/workers.py b/validators/workers.py new file mode 100644 index 00000000..c6f91fb6 --- /dev/null +++ b/validators/workers.py @@ -0,0 +1,56 @@ +import asyncio + +import redis +import json +import bittensor as bt +from cortext import ALL_SYNAPSE_TYPE, StreamPrompting, ImageResponse + + +class Worker: + # Initialize Redis client + redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) + TASK_STREAM = 'task_stream' + RESULT_STREAM = 'result_stream' + + def __init__(self, worker_id, bandwidth, config, axon): + self.worker_id = worker_id + self.bandwidth = bandwidth + self.dendrite = bt.dendrite(config.wallet) + self.axon = axon + self.report_resources() + + def report_resources(self): + # Store worker's resource info in Redis hash + self.redis_client.hset("workers", self.worker_id, self.bandwidth) + + @staticmethod + def covert_json_to_synapse(task_obj): + if task_obj.get("streaming"): + synapse = StreamPrompting.parse_obj(task_obj) + else: + synapse = ImageResponse.parse_obj(task_obj) + return synapse + + async def pull_and_run_task(self): + # Pull task from worker-specific queue + while True: + task = json.loads(self.redis_client.lpop(f"tasks:{self.worker_id}") or "{}") + if task: + synapse = self.covert_json_to_synapse(task) + bt.logging.trace(f"Worker {self.worker_id} received task: {synapse}") + task_id = synapse.task_id + try: + responses = await self.dendrite( + axons=[self.axon], + synapse=synapse, + deserialize=synapse.deserialize, + timeout=synapse.timeout, + streaming=synapse.streaming, + ) + except Exception as err: + bt.logging.exception(err) + else: + async for chunk in responses[0]: + if isinstance(chunk, str): + await self.redis_client.xadd(Worker.RESULT_STREAM, {'task_id': task_id, 'chunk': chunk}) + await asyncio.sleep(0.1) From 8efd156f0940416485a2178a12a631bed37a18b7 Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 24 Sep 2024 03:42:24 -0700 Subject: [PATCH 22/80] create seperate workers for each task --- cortext/__init__.py | 3 +++ validators/services/redis.py | 37 ++++++++++++++++++++++++++++++++++++ validators/task_manager.py | 26 ++++++++++++++++--------- validators/utils.py | 17 ++++++++++++----- validators/workers.py | 24 +++++++++++------------ 5 files changed, 81 insertions(+), 26 deletions(-) create mode 100644 validators/services/redis.py diff --git a/cortext/__init__.py b/cortext/__init__.py index d5d1907a..63e47cff 100644 --- a/cortext/__init__.py +++ b/cortext/__init__.py @@ -3769,3 +3769,6 @@ ALL_SYNAPSE_TYPE = Union[StreamPrompting, Embeddings, ImageResponse, IsAlive] + +REDIS_RESULT_STREAM = 'result_stream' +REDIS_RESULT = 'result' \ No newline at end of file diff --git a/validators/services/redis.py b/validators/services/redis.py new file mode 100644 index 00000000..f111ed25 --- /dev/null +++ b/validators/services/redis.py @@ -0,0 +1,37 @@ +import redis +import bittensor as bt +from cortext import REDIS_RESULT_STREAM + + +class Redis: + redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) + + def __init__(self): + pass + + async def get_stream_result(self, task_id): + last_id = '0' # Start reading from the beginning of the stream + bt.logging.trace(f"Waiting for results of task {task_id}...") + stream_name = REDIS_RESULT_STREAM + f"{task_id}" + + while True: + # Read from the Redis stream + result_entries = Redis.redis_client.xread({stream_name: last_id}, block=5000) + result_entries = result_entries or [] + + for entry in result_entries: + stream_name, results = entry + for result_id, data in results: + result_task_id = data[b'task_id'].decode() + result_chunk = data[b'chunk'].decode() + # Only process results for the specific task + if result_task_id == task_id: + yield result_chunk + else: + bt.logging.trace("No new results, waiting...") + break + bt.logging.trace(f"stream exit. delete old messages from queue.") + await self.redis_client.xtrim(stream_name, maxlen=0, approximate=False) + + def get_result(self, task_id): + pass diff --git a/validators/task_manager.py b/validators/task_manager.py index 447f768c..70d8e08c 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -1,32 +1,40 @@ +import asyncio + import redis import bittensor as bt from cortext import ALL_SYNAPSE_TYPE +from miner.config import config from validators.workers import Worker +from validators import utils class TaskMgr: def __init__(self, uid_to_capacities, config): # Initialize Redis client self.redis_client = redis.StrictRedis(host='redis', port=6379, db=0) - self.workers = [] + self.resources = [] self.create_workers(uid_to_capacities) self.config = config - def assign_task(self, task: ALL_SYNAPSE_TYPE): + def assign_task(self, synapse: ALL_SYNAPSE_TYPE): # Find the worker with the most available resources (simplified logic) - selected_worker = max(self.workers, - key=lambda w: self.workers[w]) # Example: Assign to worker with max remaining bandwidth - if self.workers[selected_worker] <= 0: + selected_worker = max(self.resources, + key=lambda w: self.resources[w]) # Example: Assign to worker with max remaining bandwidth + if self.resources[selected_worker] <= 0: bt.logging.debug(f"no available resources to assign this task.") return None - bt.logging.trace(f"Assigning task {task} to {selected_worker}") + task_id = utils.create_hash_value((synapse.json())) + bt.logging.trace(f"Assigning task {task_id} to {selected_worker}") + # decrease remaining capacity after sending request. - self.workers[selected_worker] -= 1 + self.resources[selected_worker] -= 1 # Push task to the selected worker's task queue - self.redis_client.rpush(f"tasks:{selected_worker}", task.json()) + worker = Worker(worker_id="123", config=config, axon=selected_worker.axon) + self.redis_client.rpush(f"tasks:{task_id}", synapse.json()) + asyncio.create_task(worker.pull_and_run_task()) def create_workers(self, uid_to_capacities): # create worker for each uid, provider, model @@ -37,4 +45,4 @@ def create_workers(self, uid_to_capacities): worker_id = f"{uid}_{provider}_{model}" worker = Worker(worker_id, cap, config=self.config) workers.append(worker) - self.workers = workers + self.resources = workers diff --git a/validators/utils.py b/validators/utils.py index 80555d5f..6b2e1bc6 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -2,7 +2,7 @@ import aiohttp import asyncio import base64 -import itertools +import hashlib import inspect import bittensor as bt @@ -123,7 +123,7 @@ def save_answer_to_cache(func): async def wrapper(*args, **kwargs): answer = await func(*args, **kwargs) query_syn: ALL_SYNAPSE_TYPE = args[2] - provider = query_syn.provider + provider = query_syn.provider model = query_syn.model try: cache_service.set_cache(question=str(query_syn.json()), answer=str(answer), provider=provider, model=model) @@ -133,9 +133,16 @@ async def wrapper(*args, **kwargs): bt.logging.trace(f"saved answer to cache successfully.") finally: return answer + return wrapper -def get_should_i_score_arr_for_image(): - for i in itertools.count(): - yield (i % 1) != 0 +def create_hash_value(input_string): + # Create a SHA-256 hash object + input_string = str(input_string) + hash_object = hashlib.sha256() + # Encode the string to bytes and update the hash object + hash_object.update(input_string.encode('utf-8')) + # Get the hexadecimal representation of the hash + hash_value = hash_object.hexdigest() + return hash_value diff --git a/validators/workers.py b/validators/workers.py index c6f91fb6..0f4b0a30 100644 --- a/validators/workers.py +++ b/validators/workers.py @@ -3,14 +3,12 @@ import redis import json import bittensor as bt -from cortext import ALL_SYNAPSE_TYPE, StreamPrompting, ImageResponse +from cortext import StreamPrompting, ImageResponse, REDIS_RESULT_STREAM, REDIS_RESULT class Worker: # Initialize Redis client redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) - TASK_STREAM = 'task_stream' - RESULT_STREAM = 'result_stream' def __init__(self, worker_id, bandwidth, config, axon): self.worker_id = worker_id @@ -19,9 +17,6 @@ def __init__(self, worker_id, bandwidth, config, axon): self.axon = axon self.report_resources() - def report_resources(self): - # Store worker's resource info in Redis hash - self.redis_client.hset("workers", self.worker_id, self.bandwidth) @staticmethod def covert_json_to_synapse(task_obj): @@ -38,9 +33,8 @@ async def pull_and_run_task(self): if task: synapse = self.covert_json_to_synapse(task) bt.logging.trace(f"Worker {self.worker_id} received task: {synapse}") - task_id = synapse.task_id try: - responses = await self.dendrite( + responses = self.dendrite( axons=[self.axon], synapse=synapse, deserialize=synapse.deserialize, @@ -50,7 +44,13 @@ async def pull_and_run_task(self): except Exception as err: bt.logging.exception(err) else: - async for chunk in responses[0]: - if isinstance(chunk, str): - await self.redis_client.xadd(Worker.RESULT_STREAM, {'task_id': task_id, 'chunk': chunk}) - await asyncio.sleep(0.1) + if synapse.streaming: + async for chunk in responses[0]: + if isinstance(chunk, str): + await self.redis_client.xadd(REDIS_RESULT_STREAM, {"chunk": chunk}) + else: + await self.redis_client.rpush(REDIS_RESULT, responses[0]) + else: + # if there is no task then await 1sec. + bt.logging.info(f"no new task to consume") + break From 2c13979d05e3ec8e089b9f51a2c2d0644d79d025 Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 24 Sep 2024 04:26:39 -0700 Subject: [PATCH 23/80] add async gen for consuming organic query --- validators/task_manager.py | 42 ++++++++++++++++++------------------- validators/weight_setter.py | 18 +++++++--------- validators/workers.py | 8 +++---- 3 files changed, 31 insertions(+), 37 deletions(-) diff --git a/validators/task_manager.py b/validators/task_manager.py index 70d8e08c..6676b575 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -1,48 +1,48 @@ import asyncio - -import redis import bittensor as bt from cortext import ALL_SYNAPSE_TYPE -from miner.config import config from validators.workers import Worker from validators import utils +from validators.services.redis import Redis class TaskMgr: - def __init__(self, uid_to_capacities, config): + def __init__(self, uid_to_capacities, dendrite, metagraph): # Initialize Redis client - self.redis_client = redis.StrictRedis(host='redis', port=6379, db=0) - self.resources = [] - self.create_workers(uid_to_capacities) - self.config = config + self.redis_client = Redis.redis_client + self.resources = {} + self.init_resources(uid_to_capacities) + self.dendrite = dendrite + self.metagraph = metagraph def assign_task(self, synapse: ALL_SYNAPSE_TYPE): # Find the worker with the most available resources (simplified logic) - selected_worker = max(self.resources, - key=lambda w: self.resources[w]) # Example: Assign to worker with max remaining bandwidth - if self.resources[selected_worker] <= 0: + resource_key = max(self.resources, + key=lambda w: self.resources[w]) # Example: Assign to worker with max remaining bandwidth + if self.resources[resource_key] <= 0: bt.logging.debug(f"no available resources to assign this task.") return None task_id = utils.create_hash_value((synapse.json())) - bt.logging.trace(f"Assigning task {task_id} to {selected_worker}") + bt.logging.trace(f"Assigning task {task_id} to {resource_key}") # decrease remaining capacity after sending request. - self.resources[selected_worker] -= 1 + self.resources[resource_key] -= 1 # Push task to the selected worker's task queue - worker = Worker(worker_id="123", config=config, axon=selected_worker.axon) + worker = Worker(task_id=task_id, dendrite=self.dendrite, axon=self.get_axon_from_resource_key(resource_key)) self.redis_client.rpush(f"tasks:{task_id}", synapse.json()) asyncio.create_task(worker.pull_and_run_task()) - def create_workers(self, uid_to_capacities): - # create worker for each uid, provider, model - workers = [] + def get_axon_from_resource_key(self, resource_key): + uid = resource_key.split("_")[0] + return self.metagraph.axons[uid] + + def init_resources(self, uid_to_capacities): + # init resources for uid, cap_info in uid_to_capacities.items(): for provider, model_to_cap in cap_info.items(): for model, cap in model_to_cap.items(): - worker_id = f"{uid}_{provider}_{model}" - worker = Worker(worker_id, cap, config=self.config) - workers.append(worker) - self.resources = workers + resource_key = f"{uid}_{provider}_{model}" + self.resources[resource_key] = cap \ No newline at end of file diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 152d1825..0ab9efb7 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -5,12 +5,14 @@ import traceback import time + from black.trans import defaultdict from substrateinterface import SubstrateInterface from functools import partial from typing import Tuple, List import bittensor as bt from bittensor import StreamingSynapse +import redis import cortext @@ -18,7 +20,7 @@ from cortext.protocol import IsAlive, StreamPrompting, ImageResponse, Embeddings from cortext.metaclasses import ValidatorRegistryMeta -from validators.services import CapacityService, BaseValidator +from validators.services import CapacityService, BaseValidator, redis from validators.services.cache import QueryResponseCache from validators.utils import handle_response, error_handler from validators.task_manager import TaskMgr @@ -33,6 +35,7 @@ def __init__(self, config, cache: QueryResponseCache): self.in_cache_processing = False self.batch_size = config.max_miners_cnt self.cache = cache + self.redis_client = redis.Redis.redis_client self.uid_to_capacity = {} self.available_uid_to_axons = {} @@ -428,25 +431,18 @@ async def prompt(self, synapse: StreamPrompting) -> StreamingSynapse.BTStreaming bt.logging.info(f"Received {synapse}") # Return the streaming response - async def _prompt(synapse: StreamPrompting, send: Send): + async def _prompt(query_synapse: StreamPrompting, send: Send): bt.logging.info(f"Sending {synapse} request to uid: {synapse.uid}") start_time = time.time() synapse.deserialize = False synapse.streaming = True - self.task_mgr.assign_task(synapse) - responses = await self.dendrite( - axons=[axon], - synapse=synapse, - deserialize=False, - timeout=synapse.timeout, - streaming=True, - ) + task_id = self.task_mgr.assign_task(query_synapse) response_text = '' - async for chunk in responses[0]: + async for chunk in self.redis_client.get_stream_result(task_id): if isinstance(chunk, str): await send({ "type": "http.response.body", diff --git a/validators/workers.py b/validators/workers.py index 0f4b0a30..0eacf6ba 100644 --- a/validators/workers.py +++ b/validators/workers.py @@ -10,12 +10,10 @@ class Worker: # Initialize Redis client redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) - def __init__(self, worker_id, bandwidth, config, axon): - self.worker_id = worker_id - self.bandwidth = bandwidth - self.dendrite = bt.dendrite(config.wallet) + def __init__(self, task_id, dendrite, axon): + self.worker_id = task_id + self.dendrite = dendrite self.axon = axon - self.report_resources() @staticmethod From 95097943e11eaaf3b05a4646916a696bd047a0ff Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 24 Sep 2024 09:02:22 -0700 Subject: [PATCH 24/80] init task_manager and use aioredis --- cortext/protocol.py | 2 +- validators/services/redis.py | 12 +++---- validators/task_manager.py | 14 ++++---- validators/weight_setter.py | 16 +++++---- validators/workers.py | 67 +++++++++++++----------------------- 5 files changed, 46 insertions(+), 65 deletions(-) diff --git a/cortext/protocol.py b/cortext/protocol.py index 26b105a0..f6462b34 100644 --- a/cortext/protocol.py +++ b/cortext/protocol.py @@ -292,7 +292,7 @@ class StreamPrompting(bt.StreamingSynapse): title="streaming", description="whether to stream the output", ) - deserialize: bool = pydantic.Field( + deserialize_flag: bool = pydantic.Field( default=True ) task_id: int = pydantic.Field( diff --git a/validators/services/redis.py b/validators/services/redis.py index f111ed25..b82edcec 100644 --- a/validators/services/redis.py +++ b/validators/services/redis.py @@ -1,22 +1,22 @@ -import redis +import aioredis +import asyncio import bittensor as bt from cortext import REDIS_RESULT_STREAM class Redis: - redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) - def __init__(self): pass - async def get_stream_result(self, task_id): + @staticmethod + async def get_stream_result(redis_client, task_id): last_id = '0' # Start reading from the beginning of the stream bt.logging.trace(f"Waiting for results of task {task_id}...") stream_name = REDIS_RESULT_STREAM + f"{task_id}" while True: # Read from the Redis stream - result_entries = Redis.redis_client.xread({stream_name: last_id}, block=5000) + result_entries = redis_client.xread({stream_name: last_id}, block=5000) result_entries = result_entries or [] for entry in result_entries: @@ -31,7 +31,7 @@ async def get_stream_result(self, task_id): bt.logging.trace("No new results, waiting...") break bt.logging.trace(f"stream exit. delete old messages from queue.") - await self.redis_client.xtrim(stream_name, maxlen=0, approximate=False) + await redis_client.xtrim(stream_name, maxlen=0, approximate=False) def get_result(self, task_id): pass diff --git a/validators/task_manager.py b/validators/task_manager.py index 6676b575..2bbfc939 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -4,13 +4,12 @@ from cortext import ALL_SYNAPSE_TYPE from validators.workers import Worker from validators import utils -from validators.services.redis import Redis class TaskMgr: - def __init__(self, uid_to_capacities, dendrite, metagraph): + def __init__(self, uid_to_capacities, dendrite, metagraph, redis_client): # Initialize Redis client - self.redis_client = Redis.redis_client + self.redis_client = redis_client self.resources = {} self.init_resources(uid_to_capacities) self.dendrite = dendrite @@ -26,14 +25,15 @@ def assign_task(self, synapse: ALL_SYNAPSE_TYPE): return None task_id = utils.create_hash_value((synapse.json())) + synapse.task_id = task_id bt.logging.trace(f"Assigning task {task_id} to {resource_key}") # decrease remaining capacity after sending request. self.resources[resource_key] -= 1 # Push task to the selected worker's task queue - worker = Worker(task_id=task_id, dendrite=self.dendrite, axon=self.get_axon_from_resource_key(resource_key)) - self.redis_client.rpush(f"tasks:{task_id}", synapse.json()) - asyncio.create_task(worker.pull_and_run_task()) + worker = Worker(synapse=synapse, dendrite=self.dendrite, axon=self.get_axon_from_resource_key(resource_key), + redis_client=self.redis_client) + asyncio.create_task(worker.run_task()) def get_axon_from_resource_key(self, resource_key): uid = resource_key.split("_")[0] @@ -45,4 +45,4 @@ def init_resources(self, uid_to_capacities): for provider, model_to_cap in cap_info.items(): for model, cap in model_to_cap.items(): resource_key = f"{uid}_{provider}_{model}" - self.resources[resource_key] = cap \ No newline at end of file + self.resources[resource_key] = cap diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 0ab9efb7..be26cdc5 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -4,7 +4,7 @@ import torch import traceback import time - +import aioredis from black.trans import defaultdict from substrateinterface import SubstrateInterface @@ -12,8 +12,6 @@ from typing import Tuple, List import bittensor as bt from bittensor import StreamingSynapse -import redis - import cortext from starlette.types import Send @@ -32,10 +30,11 @@ class WeightSetter: def __init__(self, config, cache: QueryResponseCache): # Cache object using sqlite3. + self.task_mgr = None self.in_cache_processing = False self.batch_size = config.max_miners_cnt self.cache = cache - self.redis_client = redis.Redis.redis_client + self.redis_client = aioredis.from_url("redis://localhost", encoding="utf-8", decode_responses=True) self.uid_to_capacity = {} self.available_uid_to_axons = {} @@ -79,12 +78,12 @@ def __init__(self, config, cache: QueryResponseCache): self.tempo = self.subtensor.tempo(self.netuid) self.weights_rate_limit = self.get_weights_rate_limit() + # initialize uid and capacities. asyncio.run(self.initialize_uids_and_capacities()) - self.task_mgr = TaskMgr(uid_to_capacities=self.uid_to_capacity, config=config) # Set up async tasks self.thread_executor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix='asyncio') self.loop.create_task(self.consume_organic_queries()) - # self.loop.create_task(self.perform_synthetic_queries()) + self.loop.create_task(self.perform_synthetic_queries()) self.loop.create_task(self.process_queries_from_database()) async def run_sync_in_async(self, fn): @@ -125,6 +124,9 @@ async def initialize_uids_and_capacities(self): self.total_scores = {uid: 0.0 for uid in self.available_uid_to_axons.keys()} self.score_counts = {uid: 0 for uid in self.available_uid_to_axons.keys()} + self.task_mgr = TaskMgr(uid_to_capacities=self.uid_to_capacity, dendrite=self.dendrite, + metagraph=self.metagraph, redis_client=self.redis_client) + async def update_and_refresh(self, last_update): bt.logging.info(f"Setting weights, last update {last_update} blocks ago") await self.update_weights() @@ -435,7 +437,7 @@ async def _prompt(query_synapse: StreamPrompting, send: Send): bt.logging.info(f"Sending {synapse} request to uid: {synapse.uid}") start_time = time.time() - synapse.deserialize = False + synapse.deserialize_flag = False synapse.streaming = True task_id = self.task_mgr.assign_task(query_synapse) diff --git a/validators/workers.py b/validators/workers.py index 0eacf6ba..960dc9ad 100644 --- a/validators/workers.py +++ b/validators/workers.py @@ -1,54 +1,33 @@ -import asyncio - -import redis -import json import bittensor as bt -from cortext import StreamPrompting, ImageResponse, REDIS_RESULT_STREAM, REDIS_RESULT +from cortext import REDIS_RESULT_STREAM, REDIS_RESULT class Worker: - # Initialize Redis client - redis_client = redis.StrictRedis(host='localhost', port=6379, db=0) - def __init__(self, task_id, dendrite, axon): - self.worker_id = task_id + def __init__(self, synapse, dendrite, axon, redis_client): + self.redis_client = redis_client + self.synapse = synapse self.dendrite = dendrite self.axon = axon - - @staticmethod - def covert_json_to_synapse(task_obj): - if task_obj.get("streaming"): - synapse = StreamPrompting.parse_obj(task_obj) - else: - synapse = ImageResponse.parse_obj(task_obj) - return synapse - - async def pull_and_run_task(self): + async def run_task(self): # Pull task from worker-specific queue - while True: - task = json.loads(self.redis_client.lpop(f"tasks:{self.worker_id}") or "{}") - if task: - synapse = self.covert_json_to_synapse(task) - bt.logging.trace(f"Worker {self.worker_id} received task: {synapse}") - try: - responses = self.dendrite( - axons=[self.axon], - synapse=synapse, - deserialize=synapse.deserialize, - timeout=synapse.timeout, - streaming=synapse.streaming, - ) - except Exception as err: - bt.logging.exception(err) - else: - if synapse.streaming: - async for chunk in responses[0]: - if isinstance(chunk, str): - await self.redis_client.xadd(REDIS_RESULT_STREAM, {"chunk": chunk}) - else: - await self.redis_client.rpush(REDIS_RESULT, responses[0]) + task_id = self.synapse.task_id + bt.logging.trace(f"Worker {task_id} received task: {self.synapse}") + try: + responses = await self.dendrite( + axons=[self.axon], + synapse=self.synapse, + deserialize=self.synapse.deserialize_flag, + timeout=self.synapse.timeout, + streaming=self.synapse.streaming, + ) + except Exception as err: + bt.logging.exception(err) + else: + if self.synapse.streaming: + async for chunk in responses[0]: + if isinstance(chunk, str): + await self.redis_client.xadd(REDIS_RESULT_STREAM + f"{task_id}", {"chunk": chunk}) else: - # if there is no task then await 1sec. - bt.logging.info(f"no new task to consume") - break + await self.redis_client.rpush(REDIS_RESULT, responses[0]) From 38d4814851c452e3f15e4a5890ab3c52e4f1510f Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 24 Sep 2024 09:03:20 -0700 Subject: [PATCH 25/80] add aioredis to requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 95f7db8a..2adae9f6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ pyOpenSSL==24.* google-generativeai groq==0.5.0 aioboto3==13.1.1 +aioredis==2.0.1 \ No newline at end of file From ff47de67ccbd1b4fe371fca15e9f87fee8f87cf7 Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 24 Sep 2024 10:56:09 -0700 Subject: [PATCH 26/80] fix some issues --- cortext/protocol.py | 10 +++++-- validators/services/capacity.py | 5 ++-- validators/services/redis.py | 37 ------------------------- validators/task_manager.py | 6 ++-- validators/utils.py | 49 +++++++++++++++++++++++++++++---- validators/weight_setter.py | 43 ++++++++--------------------- 6 files changed, 70 insertions(+), 80 deletions(-) delete mode 100644 validators/services/redis.py diff --git a/cortext/protocol.py b/cortext/protocol.py index f6462b34..1b03f3c2 100644 --- a/cortext/protocol.py +++ b/cortext/protocol.py @@ -17,6 +17,7 @@ class IsAlive(bt.Synapse): class Bandwidth(bt.Synapse): bandwidth_rpm: Optional[Dict[str, dict]] = None + class ImageResponse(bt.Synapse): """ A class to represent the response for an image-related request. """ # https://platform.stability.ai/docs/api-reference#tag/v1generation/operation/textToImage @@ -129,6 +130,9 @@ class ImageResponse(bt.Synapse): title="process time", description="processed time of querying dendrite.", ) + task_id: str = pydantic.Field( + default="9999" + ) def deserialize(self) -> Optional[Dict]: """ Deserialize the completion data of the image response. """ @@ -295,8 +299,10 @@ class StreamPrompting(bt.StreamingSynapse): deserialize_flag: bool = pydantic.Field( default=True ) - task_id: int = pydantic.Field( - default=0 + task_id: str = pydantic.Field( + default="9999", + title="task_id", + description="task id of the request from this syanpse." ) async def process_streaming_response(self, response: StreamingResponse) -> AsyncIterator[str]: diff --git a/validators/services/capacity.py b/validators/services/capacity.py index c5efa4d2..2231f406 100644 --- a/validators/services/capacity.py +++ b/validators/services/capacity.py @@ -1,5 +1,6 @@ import asyncio from copy import deepcopy +from typing import List from cortext.protocol import Bandwidth from cortext import MIN_REQUEST_PERIOD @@ -27,13 +28,13 @@ async def query_capacity_to_miners(self, available_uids): capacity_query_tasks.append(task) # Query responses is (uid. syn) - query_responses = await asyncio.gather(*capacity_query_tasks, return_exceptions=True) + query_responses: List[Bandwidth] = await asyncio.gather(*capacity_query_tasks, return_exceptions=True) uid_to_capacity = {} for uid, resp in zip(available_uids, query_responses): if isinstance(resp, Exception): bt.logging.error(f"exception happens while querying capacity to miner {uid}, {resp}") else: - uid_to_capacity[uid] = resp + uid_to_capacity[uid] = resp.bandwidth_rpm self.uid_to_capacity = deepcopy(uid_to_capacity) return uid_to_capacity diff --git a/validators/services/redis.py b/validators/services/redis.py deleted file mode 100644 index b82edcec..00000000 --- a/validators/services/redis.py +++ /dev/null @@ -1,37 +0,0 @@ -import aioredis -import asyncio -import bittensor as bt -from cortext import REDIS_RESULT_STREAM - - -class Redis: - def __init__(self): - pass - - @staticmethod - async def get_stream_result(redis_client, task_id): - last_id = '0' # Start reading from the beginning of the stream - bt.logging.trace(f"Waiting for results of task {task_id}...") - stream_name = REDIS_RESULT_STREAM + f"{task_id}" - - while True: - # Read from the Redis stream - result_entries = redis_client.xread({stream_name: last_id}, block=5000) - result_entries = result_entries or [] - - for entry in result_entries: - stream_name, results = entry - for result_id, data in results: - result_task_id = data[b'task_id'].decode() - result_chunk = data[b'chunk'].decode() - # Only process results for the specific task - if result_task_id == task_id: - yield result_chunk - else: - bt.logging.trace("No new results, waiting...") - break - bt.logging.trace(f"stream exit. delete old messages from queue.") - await redis_client.xtrim(stream_name, maxlen=0, approximate=False) - - def get_result(self, task_id): - pass diff --git a/validators/task_manager.py b/validators/task_manager.py index 2bbfc939..fbf81d3c 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -2,6 +2,7 @@ import bittensor as bt from cortext import ALL_SYNAPSE_TYPE +from validators.utils import error_handler from validators.workers import Worker from validators import utils @@ -15,6 +16,7 @@ def __init__(self, uid_to_capacities, dendrite, metagraph, redis_client): self.dendrite = dendrite self.metagraph = metagraph + @error_handler def assign_task(self, synapse: ALL_SYNAPSE_TYPE): # Find the worker with the most available resources (simplified logic) @@ -23,7 +25,6 @@ def assign_task(self, synapse: ALL_SYNAPSE_TYPE): if self.resources[resource_key] <= 0: bt.logging.debug(f"no available resources to assign this task.") return None - task_id = utils.create_hash_value((synapse.json())) synapse.task_id = task_id bt.logging.trace(f"Assigning task {task_id} to {resource_key}") @@ -34,9 +35,10 @@ def assign_task(self, synapse: ALL_SYNAPSE_TYPE): worker = Worker(synapse=synapse, dendrite=self.dendrite, axon=self.get_axon_from_resource_key(resource_key), redis_client=self.redis_client) asyncio.create_task(worker.run_task()) + return task_id def get_axon_from_resource_key(self, resource_key): - uid = resource_key.split("_")[0] + uid = int(resource_key.split("_")[0]) return self.metagraph.axons[uid] def init_resources(self, uid_to_capacities): diff --git a/validators/utils.py b/validators/utils.py index 6b2e1bc6..067f6e8a 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -9,9 +9,10 @@ from PIL import Image from io import BytesIO from functools import wraps -import logging +import traceback -from cortext import ImageResponse, ALL_SYNAPSE_TYPE + +from cortext import ImageResponse, ALL_SYNAPSE_TYPE, REDIS_RESULT_STREAM from validators.services.cache import cache_service @@ -36,12 +37,25 @@ async def wrapper(*args, **kwargs): try: result = await func(*args, **kwargs) except Exception as err: - logging.exception(err) + bt.logging.error(f"{err}. {traceback.format_exc()}") return None + else: + return result - return result + @wraps(func) + def wrapper_sync(*args, **kwargs): + try: + result = func(*args, **kwargs) + except Exception as err: + bt.logging.error(f"{err}. {traceback.format_exc()}") + return None + else: + return result - return wrapper + if inspect.iscoroutine(func): + return wrapper + else: + return wrapper_sync async def handle_response_stream(responses) -> tuple[str, str]: @@ -146,3 +160,28 @@ def create_hash_value(input_string): # Get the hexadecimal representation of the hash hash_value = hash_object.hexdigest() return hash_value + + +async def get_stream_result(redis_client, task_id): + last_id = '0' # Start reading from the beginning of the stream + bt.logging.trace(f"Waiting for results of task {task_id}...") + stream_name = REDIS_RESULT_STREAM + f"{task_id}" + + while True: + # Read from the Redis stream + result_entries = redis_client.xread({stream_name: last_id}, block=5000) + result_entries = result_entries or [] + + for entry in result_entries: + stream_name, results = entry + for result_id, data in results: + result_task_id = data[b'task_id'].decode() + result_chunk = data[b'chunk'].decode() + # Only process results for the specific task + if result_task_id == task_id: + yield result_chunk + else: + bt.logging.trace("No new results, waiting...") + break + bt.logging.trace(f"stream exit. delete old messages from queue.") + await redis_client.xtrim(stream_name, maxlen=0, approximate=False) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index be26cdc5..b3dad5bb 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -18,9 +18,9 @@ from cortext.protocol import IsAlive, StreamPrompting, ImageResponse, Embeddings from cortext.metaclasses import ValidatorRegistryMeta -from validators.services import CapacityService, BaseValidator, redis +from validators.services import CapacityService, BaseValidator from validators.services.cache import QueryResponseCache -from validators.utils import handle_response, error_handler +from validators.utils import handle_response, error_handler, get_stream_result from validators.task_manager import TaskMgr scoring_organic_timeout = 60 @@ -83,8 +83,8 @@ def __init__(self, config, cache: QueryResponseCache): # Set up async tasks self.thread_executor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix='asyncio') self.loop.create_task(self.consume_organic_queries()) - self.loop.create_task(self.perform_synthetic_queries()) - self.loop.create_task(self.process_queries_from_database()) + # self.loop.create_task(self.perform_synthetic_queries()) + # self.loop.create_task(self.process_queries_from_database()) async def run_sync_in_async(self, fn): return await self.loop.run_in_executor(self.thread_executor, fn) @@ -133,33 +133,7 @@ async def update_and_refresh(self, last_update): bt.logging.info("Refreshing metagraph...") await self.refresh_metagraph() - - bt.logging.info("Refreshing available UIDs...") - new_available_uids = await self.get_available_uids() - bt.logging.info(f"Available UIDs: {list(new_available_uids.keys())}") - - bt.logging.info("Refreshing capacities...") - self.uid_to_capacity = await self.get_capacities_for_uids(new_available_uids) - - # Update total_scores, score_counts - # Remove UIDs that are no longer available - for uid in list(self.total_scores.keys()): - if uid not in new_available_uids: - del self.total_scores[uid] - del self.score_counts[uid] - - # Add new UIDs - for uid in new_available_uids: - if uid not in self.total_scores: - self.total_scores[uid] = 0.0 - self.score_counts[uid] = 0 - - # Reset counts for new epoch - for uid in self.total_scores.keys(): - self.total_scores[uid] = 0.0 - self.score_counts[uid] = 0 - - self.available_uid_to_axons = new_available_uids + await self.initialize_uids_and_capacities() async def perform_synthetic_queries(self): while True: @@ -441,10 +415,15 @@ async def _prompt(query_synapse: StreamPrompting, send: Send): synapse.streaming = True task_id = self.task_mgr.assign_task(query_synapse) + if task_id is None: + bt.logging.error("Can't create task.") + await send({"type": "http.response.body", "body": b'', "more_body": False}) + return + bt.logging.trace(f"task is created and task_id is {task_id}") response_text = '' - async for chunk in self.redis_client.get_stream_result(task_id): + async for chunk in get_stream_result(task_id=task_id, redis_client=self.redis_client): if isinstance(chunk, str): await send({ "type": "http.response.body", From 56b6fce01238ed984646c09216de3297d58f6be4 Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 24 Sep 2024 12:37:03 -0700 Subject: [PATCH 27/80] add more error handler and complete organic query with new load balancer --- validators/task_manager.py | 5 +++++ validators/utils.py | 25 ++++++++++++------------- validators/weight_setter.py | 2 +- validators/workers.py | 1 + 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/validators/task_manager.py b/validators/task_manager.py index fbf81d3c..d806c62d 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -27,6 +27,7 @@ def assign_task(self, synapse: ALL_SYNAPSE_TYPE): return None task_id = utils.create_hash_value((synapse.json())) synapse.task_id = task_id + synapse.uid = self.get_id_from_resource_key(resource_key) bt.logging.trace(f"Assigning task {task_id} to {resource_key}") # decrease remaining capacity after sending request. @@ -41,6 +42,10 @@ def get_axon_from_resource_key(self, resource_key): uid = int(resource_key.split("_")[0]) return self.metagraph.axons[uid] + @staticmethod + def get_id_from_resource_key(resource_key): + return int(resource_key.split("_")[0]) + def init_resources(self, uid_to_capacities): # init resources for uid, cap_info in uid_to_capacities.items(): diff --git a/validators/utils.py b/validators/utils.py index 067f6e8a..f479dfa6 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -11,7 +11,6 @@ from functools import wraps import traceback - from cortext import ImageResponse, ALL_SYNAPSE_TYPE, REDIS_RESULT_STREAM from validators.services.cache import cache_service @@ -162,6 +161,7 @@ def create_hash_value(input_string): return hash_value +@error_handler async def get_stream_result(redis_client, task_id): last_id = '0' # Start reading from the beginning of the stream bt.logging.trace(f"Waiting for results of task {task_id}...") @@ -169,19 +169,18 @@ async def get_stream_result(redis_client, task_id): while True: # Read from the Redis stream - result_entries = redis_client.xread({stream_name: last_id}, block=5000) + result_entries = await redis_client.xread({stream_name: last_id}, block=5000) result_entries = result_entries or [] - - for entry in result_entries: - stream_name, results = entry - for result_id, data in results: - result_task_id = data[b'task_id'].decode() - result_chunk = data[b'chunk'].decode() - # Only process results for the specific task - if result_task_id == task_id: + if result_entries: + for entry in result_entries: + stream_name, results = entry + for result_id, data in results: + result_chunk = data['chunk'] + last_id = result_id + bt.logging.trace(result_chunk) yield result_chunk else: - bt.logging.trace("No new results, waiting...") + bt.logging.trace("No new results. stop generation.") break - bt.logging.trace(f"stream exit. delete old messages from queue.") - await redis_client.xtrim(stream_name, maxlen=0, approximate=False) + bt.logging.trace(f"stream exit. delete old stream from queue.") + await redis_client.delete(stream_name) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index b3dad5bb..e4b9989b 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -430,8 +430,8 @@ async def _prompt(query_synapse: StreamPrompting, send: Send): "body": chunk.encode("utf-8"), "more_body": True, }) - bt.logging.info(f"Streamed text: {chunk}") response_text += chunk + bt.logging.trace(response_text) await send({"type": "http.response.body", "body": b'', "more_body": False}) diff --git a/validators/workers.py b/validators/workers.py index 960dc9ad..9d5d422b 100644 --- a/validators/workers.py +++ b/validators/workers.py @@ -15,6 +15,7 @@ async def run_task(self): task_id = self.synapse.task_id bt.logging.trace(f"Worker {task_id} received task: {self.synapse}") try: + await self.dendrite.aclose_session() responses = await self.dendrite( axons=[self.axon], synapse=self.synapse, From 22f0ab5d8270d4c3d72cacbbff4eeb38110589ef Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 25 Sep 2024 06:12:26 -0700 Subject: [PATCH 28/80] fix issue of duplicated task_id for same synapse. --- validators/task_manager.py | 4 +++- validators/utils.py | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/validators/task_manager.py b/validators/task_manager.py index d806c62d..80d295b3 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -25,9 +25,11 @@ def assign_task(self, synapse: ALL_SYNAPSE_TYPE): if self.resources[resource_key] <= 0: bt.logging.debug(f"no available resources to assign this task.") return None + + synapse.uid = self.get_id_from_resource_key(resource_key) task_id = utils.create_hash_value((synapse.json())) synapse.task_id = task_id - synapse.uid = self.get_id_from_resource_key(resource_key) + bt.logging.trace(f"Assigning task {task_id} to {resource_key}") # decrease remaining capacity after sending request. diff --git a/validators/utils.py b/validators/utils.py index f479dfa6..46dc138b 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -1,3 +1,4 @@ +import random import time import aiohttp import asyncio @@ -151,8 +152,8 @@ async def wrapper(*args, **kwargs): def create_hash_value(input_string): - # Create a SHA-256 hash object - input_string = str(input_string) + # Create a SHA-256 hash object based on random and synpase + input_string = str(input_string) + str(random.Random().random()) hash_object = hashlib.sha256() # Encode the string to bytes and update the hash object hash_object.update(input_string.encode('utf-8')) From 59cc2f77c7179eded802ff62ff4a0a19d69b3915 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 25 Sep 2024 07:58:37 -0700 Subject: [PATCH 29/80] add synthetic queries to load balancer. --- validators/task_manager.py | 46 +++++++++----------- validators/utils.py | 16 +++++++ validators/weight_setter.py | 84 ++++++++++++++++--------------------- 3 files changed, 73 insertions(+), 73 deletions(-) diff --git a/validators/task_manager.py b/validators/task_manager.py index 80d295b3..9cd2bb35 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -1,4 +1,5 @@ import asyncio +from copy import deepcopy import bittensor as bt from cortext import ALL_SYNAPSE_TYPE @@ -11,47 +12,40 @@ class TaskMgr: def __init__(self, uid_to_capacities, dendrite, metagraph, redis_client): # Initialize Redis client self.redis_client = redis_client - self.resources = {} - self.init_resources(uid_to_capacities) + self.remain_resources = deepcopy(uid_to_capacities) self.dendrite = dendrite self.metagraph = metagraph @error_handler def assign_task(self, synapse: ALL_SYNAPSE_TYPE): - - # Find the worker with the most available resources (simplified logic) - resource_key = max(self.resources, - key=lambda w: self.resources[w]) # Example: Assign to worker with max remaining bandwidth - if self.resources[resource_key] <= 0: - bt.logging.debug(f"no available resources to assign this task.") + # find miner which bandwidth > 0. + uid = self.choose_miner(synapse) # Example: Assign to worker with max remaining bandwidth + if uid is None: + bt.logging.debug(f"no available resources to process this request.") return None - synapse.uid = self.get_id_from_resource_key(resource_key) + synapse.uid = uid task_id = utils.create_hash_value((synapse.json())) synapse.task_id = task_id - bt.logging.trace(f"Assigning task {task_id} to {resource_key}") + bt.logging.trace(f"Assigning task {task_id} to miner {uid}") - # decrease remaining capacity after sending request. - self.resources[resource_key] -= 1 # Push task to the selected worker's task queue - worker = Worker(synapse=synapse, dendrite=self.dendrite, axon=self.get_axon_from_resource_key(resource_key), + worker = Worker(synapse=synapse, dendrite=self.dendrite, axon=self.get_axon_from_uid(uid=uid), redis_client=self.redis_client) asyncio.create_task(worker.run_task()) return task_id - def get_axon_from_resource_key(self, resource_key): - uid = int(resource_key.split("_")[0]) + def get_axon_from_uid(self, uid): + uid = int(uid) return self.metagraph.axons[uid] - @staticmethod - def get_id_from_resource_key(resource_key): - return int(resource_key.split("_")[0]) - - def init_resources(self, uid_to_capacities): - # init resources - for uid, cap_info in uid_to_capacities.items(): - for provider, model_to_cap in cap_info.items(): - for model, cap in model_to_cap.items(): - resource_key = f"{uid}_{provider}_{model}" - self.resources[resource_key] = cap + def choose_miner(self, synapse: ALL_SYNAPSE_TYPE): + provider = synapse.provider + model = synapse.model + for uid, capacity in self.remain_resources.items(): + bandwidth = capacity.get(provider).get(model) + if bandwidth is not None and bandwidth > 0: + # decrease resource by one after choosing this miner for the request. + capacity[provider][model] -= 1 + return uid diff --git a/validators/utils.py b/validators/utils.py index 46dc138b..c327be3c 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -185,3 +185,19 @@ async def get_stream_result(redis_client, task_id): break bt.logging.trace(f"stream exit. delete old stream from queue.") await redis_client.delete(stream_name) + + +def find_positive_values(data: dict): + positive_values = {} + + for key, value in data.items(): + if isinstance(value, dict): + # Recursively handle nested dictionaries + nested_result = find_positive_values(value) + if nested_result: + positive_values[key] = nested_result + elif isinstance(value, (int, float)) and value > 0: + # Store key-value pairs where the value is greater than 0 + positive_values[key] = value + + return positive_values diff --git a/validators/weight_setter.py b/validators/weight_setter.py index e4b9989b..096d11a9 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -18,7 +18,8 @@ from cortext.protocol import IsAlive, StreamPrompting, ImageResponse, Embeddings from cortext.metaclasses import ValidatorRegistryMeta -from validators.services import CapacityService, BaseValidator +from cortext import MIN_REQUEST_PERIOD +from validators.services import CapacityService, BaseValidator, TextValidator, ImageValidator from validators.services.cache import QueryResponseCache from validators.utils import handle_response, error_handler, get_stream_result from validators.task_manager import TaskMgr @@ -30,11 +31,12 @@ class WeightSetter: def __init__(self, config, cache: QueryResponseCache): # Cache object using sqlite3. - self.task_mgr = None + self.task_mgr: TaskMgr = None self.in_cache_processing = False self.batch_size = config.max_miners_cnt self.cache = cache self.redis_client = aioredis.from_url("redis://localhost", encoding="utf-8", decode_responses=True) + self.start_time = time.time() self.uid_to_capacity = {} self.available_uid_to_axons = {} @@ -83,7 +85,7 @@ def __init__(self, config, cache: QueryResponseCache): # Set up async tasks self.thread_executor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix='asyncio') self.loop.create_task(self.consume_organic_queries()) - # self.loop.create_task(self.perform_synthetic_queries()) + self.loop.create_task(self.perform_synthetic_queries()) # self.loop.create_task(self.process_queries_from_database()) async def run_sync_in_async(self, fn): @@ -137,53 +139,41 @@ async def update_and_refresh(self, last_update): async def perform_synthetic_queries(self): while True: - if not self.available_uid_to_axons: - await self.initialize_uids_and_capacities() + time.sleep(MIN_REQUEST_PERIOD * 60) + # check available bandwidth and send synthetic requests to all miners. + query_tasks = [] + for uid, provider_to_cap in self.task_mgr.remain_resources.items(): + for provider, model_to_cap in provider_to_cap.items(): + for model, bandwidth in model_to_cap.items(): + if bandwidth > 0: + # create task and send remaining requests to the miner + vali = self.choose_validator_from_model(model) + query_task = vali.create_query(uid, provider, model) + query_tasks.append(query_task) + else: + continue + async with self.lock: + start_time = time.time() + query_synapses = await asyncio.gather(*query_tasks) + bt.logging.debug(f"{time.time() - start_time} elapsed for getting synthetic queries.") - current_block = self.get_current_block() - last_update = self.get_last_update(current_block) + synthetic_task_ids = [] + for query_syn in query_synapses: + task_id = self.task_mgr.assign_task(query_syn) + synthetic_task_ids.append(task_id) - if last_update >= self.tempo * 2 or ( - self.get_blocks_til_epoch(current_block) < 10 and last_update >= self.weights_rate_limit): - async with self.lock: - bt.logging.info("start scoring with cache database") - await self.process_queries_from_cache_database() - bt.logging.info("complete scoring with cache database") - await self.update_and_refresh(last_update) + # get result from all synthetic tasks + - # Decide which UIDs to query, considering total queries sent - async with self.lock: - if not self.uids_to_query: - bt.logging.info("All UIDs has been processed.") - await asyncio.sleep(10) - continue - else: - # Limit the number of UIDs to query based on configuration - uids_to_query_batch = self.uids_to_query[:self.batch_size] - # remove processing uids - self.uids_to_query = self.uids_to_query[self.batch_size:] - - for selected_validator in self.get_validators(): - # Perform synthetic queries - bt.logging.info("start querying to miners") - query_responses = await self.perform_queries(selected_validator, uids_to_query_batch) - # Store queries and responses in the shared database - async with self.lock: - for uid, response_data in query_responses: - # Decide whether to score this query - self.query_database.append({ - 'uid': uid, - 'synapse': response_data['query'], - 'response': response_data['response'], - 'query_type': 'synthetic', - 'timestamp': asyncio.get_event_loop().time(), - 'validator': selected_validator - }) - await asyncio.sleep(1) - - bt.logging.info(f"Performed synthetic queries for UIDs: {uids_to_query_batch}") - # Slow down the validator steps if necessary - await asyncio.sleep(1) + + def choose_validator_from_model(self, model): + text_validator = ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) + image_validator = ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, + metagraph=self.metagraph) + if model != 'dall-e-3': + return text_validator + else: + return image_validator def should_i_score(self): # Randomly decide whether to score this query based on scoring_percent From 40e299dd772bb902ce53105af1c67cba25b3a7db Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 25 Sep 2024 09:16:06 -0700 Subject: [PATCH 30/80] add synthetic process to load balancer --- validators/models/__init__.py | 0 validators/models/enum.py | 6 +++++ validators/utils.py | 29 +++++++++++++++++++++- validators/weight_setter.py | 45 +++++++++++++++++++++++++---------- 4 files changed, 67 insertions(+), 13 deletions(-) create mode 100644 validators/models/__init__.py create mode 100644 validators/models/enum.py diff --git a/validators/models/__init__.py b/validators/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/validators/models/enum.py b/validators/models/enum.py new file mode 100644 index 00000000..ae2b1a3d --- /dev/null +++ b/validators/models/enum.py @@ -0,0 +1,6 @@ +from enum import Enum + + +class QueryType(str, Enum): # Inherit from str to enforce the value type as string + organic_type = 'organic' + synthetic_type = 'synthetic' diff --git a/validators/utils.py b/validators/utils.py index c327be3c..907606b8 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -163,7 +163,7 @@ def create_hash_value(input_string): @error_handler -async def get_stream_result(redis_client, task_id): +async def get_stream_result_as_async_gen(redis_client, task_id): last_id = '0' # Start reading from the beginning of the stream bt.logging.trace(f"Waiting for results of task {task_id}...") stream_name = REDIS_RESULT_STREAM + f"{task_id}" @@ -187,6 +187,33 @@ async def get_stream_result(redis_client, task_id): await redis_client.delete(stream_name) +@error_handler +async def get_stream_result(redis_client, task_id): + last_id = '0' # Start reading from the beginning of the stream + bt.logging.trace(f"Waiting for results of task {task_id}...") + stream_name = REDIS_RESULT_STREAM + f"{task_id}" + full_response = "" + start_time = time.time() + while True: + # Read from the Redis stream + result_entries = await redis_client.xread({stream_name: last_id}, block=5000) + result_entries = result_entries or [] + if result_entries: + for entry in result_entries: + stream_name, results = entry + for result_id, data in results: + result_chunk = data['chunk'] + last_id = result_id + bt.logging.trace(result_chunk) + full_response += result_chunk + else: + bt.logging.trace("No new results. stop generation.") + break + bt.logging.trace(f"stream exit. delete old stream from queue.") + await redis_client.delete(stream_name) + return full_response, time.time() - start_time + + def find_positive_values(data: dict): positive_values = {} diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 096d11a9..f727bcea 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -21,8 +21,9 @@ from cortext import MIN_REQUEST_PERIOD from validators.services import CapacityService, BaseValidator, TextValidator, ImageValidator from validators.services.cache import QueryResponseCache -from validators.utils import handle_response, error_handler, get_stream_result +from validators.utils import handle_response, error_handler, get_stream_result_as_async_gen, get_stream_result from validators.task_manager import TaskMgr +from validators.models.enum import QueryType scoring_organic_timeout = 60 @@ -86,7 +87,7 @@ def __init__(self, config, cache: QueryResponseCache): self.thread_executor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix='asyncio') self.loop.create_task(self.consume_organic_queries()) self.loop.create_task(self.perform_synthetic_queries()) - # self.loop.create_task(self.process_queries_from_database()) + self.loop.create_task(self.process_queries_from_database()) async def run_sync_in_async(self, fn): return await self.loop.run_in_executor(self.thread_executor, fn) @@ -139,7 +140,10 @@ async def update_and_refresh(self, last_update): async def perform_synthetic_queries(self): while True: - time.sleep(MIN_REQUEST_PERIOD * 60) + # wait for MIN_REQUEST_PERIOD minutes. + await asyncio.sleep(MIN_REQUEST_PERIOD * 60) + bt.logging.info(f"start processing synthetic queries {time.time()}") + start_time = time.time() # check available bandwidth and send synthetic requests to all miners. query_tasks = [] for uid, provider_to_cap in self.task_mgr.remain_resources.items(): @@ -152,19 +156,36 @@ async def perform_synthetic_queries(self): query_tasks.append(query_task) else: continue - async with self.lock: - start_time = time.time() - query_synapses = await asyncio.gather(*query_tasks) - bt.logging.debug(f"{time.time() - start_time} elapsed for getting synthetic queries.") + + query_synapses = await asyncio.gather(*query_tasks) synthetic_task_ids = [] - for query_syn in query_synapses: - task_id = self.task_mgr.assign_task(query_syn) - synthetic_task_ids.append(task_id) + async with self.lock: + for query_syn in query_synapses: + task_id = self.task_mgr.assign_task(query_syn) + synthetic_task_ids.append(task_id) + + bt.logging.debug(f"{time.time() - start_time} elapsed for creating and submitting synthetic queries.") # get result from all synthetic tasks - + synthetic_result_tasks = [] + for task_id in synthetic_task_ids: + task = get_stream_result(redis_client=self.redis_client, task_id=task_id) + synthetic_result_tasks.append(task) + + synthetic_results = await asyncio.gather(*synthetic_result_tasks) + for synapse, (result, time_process) in zip(query_synapses, synthetic_results): + self.query_database.append({ + 'uid': synapse.uid, + 'synapse': synapse, + 'response': (result, time_process), + 'query_type': QueryType.synthetic_type, + 'timestamp': asyncio.get_event_loop().time(), + 'validator': self.choose_validator_from_model(synapse.model) + }) + bt.logging.info( + f"synthetic queries has been processed successfully. total queries are {len(query_synapses)}") def choose_validator_from_model(self, model): text_validator = ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) @@ -413,7 +434,7 @@ async def _prompt(query_synapse: StreamPrompting, send: Send): response_text = '' - async for chunk in get_stream_result(task_id=task_id, redis_client=self.redis_client): + async for chunk in get_stream_result_as_async_gen(task_id=task_id, redis_client=self.redis_client): if isinstance(chunk, str): await send({ "type": "http.response.body", From df699bad30f796dc80ef4c568ec305a5b63d9804 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 25 Sep 2024 10:40:05 -0700 Subject: [PATCH 31/80] process query_responses and scoring every epoch/10 --- validators/weight_setter.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index f727bcea..b91b2d73 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -32,6 +32,7 @@ class WeightSetter: def __init__(self, config, cache: QueryResponseCache): # Cache object using sqlite3. + self.synthetic_task_done = False self.task_mgr: TaskMgr = None self.in_cache_processing = False self.batch_size = config.max_miners_cnt @@ -157,10 +158,10 @@ async def perform_synthetic_queries(self): else: continue - query_synapses = await asyncio.gather(*query_tasks) - + # don't process any organic query while processing synthetic queries. synthetic_task_ids = [] async with self.lock: + query_synapses = await asyncio.gather(*query_tasks) for query_syn in query_synapses: task_id = self.task_mgr.assign_task(query_syn) synthetic_task_ids.append(task_id) @@ -184,6 +185,8 @@ async def perform_synthetic_queries(self): 'validator': self.choose_validator_from_model(synapse.model) }) + self.synthetic_task_done = True + bt.logging.info( f"synthetic queries has been processed successfully. total queries are {len(query_synapses)}") @@ -516,14 +519,21 @@ def get_scoring_tasks_from_query_responses(self, queries_to_process): async def process_queries_from_database(self): while True: await asyncio.sleep(1) # Adjust the sleep time as needed + + # accumulate all query results for MIN_REQUEST_PERIOD + if not self.query_database or not self.synthetic_task_done: + bt.logging.trace("no data in query_database. so continue...") + continue + async with self.lock: - if not self.query_database: - bt.logging.trace("no data in query_database. so continue...") - continue # Copy queries to process and clear the database queries_to_process = self.query_database.copy() self.query_database.clear() + self.synthetic_task_done = False + + + # with all query_respones, select one per uid, provider, model randomly and score them. score_tasks = self.get_scoring_tasks_from_query_responses(queries_to_process) resps = await asyncio.gather(*score_tasks) From f14998ce0659a49e6cbc76c05dc0f101fbd067c6 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 25 Sep 2024 11:48:58 -0700 Subject: [PATCH 32/80] remove miner's limit of request/min and instead use bandwidth of each miner --- miner/services/base.py | 22 --- validators/dendrite.py | 144 ------------------ .../services/validators/base_validator.py | 2 +- validators/weight_setter.py | 13 +- 4 files changed, 7 insertions(+), 174 deletions(-) delete mode 100644 validators/dendrite.py diff --git a/miner/services/base.py b/miner/services/base.py index 6affce99..41b724f9 100644 --- a/miner/services/base.py +++ b/miner/services/base.py @@ -60,28 +60,6 @@ def base_blacklist(self, synapse) -> Tuple[bool, str]: if stake < self.blacklist_amt: return True, f"Blacklisted a low stake {synapse_type} request: {stake} < {self.blacklist_amt} from {hotkey}" - time_window = cortext.MIN_REQUEST_PERIOD * 60 - current_time = time.time() - - if hotkey not in BaseService.request_timestamps: - BaseService.request_timestamps[hotkey] = deque() - - # Remove timestamps outside the current time window - while (BaseService.request_timestamps[hotkey] and - current_time - BaseService.request_timestamps[hotkey][0] > time_window): - BaseService.request_timestamps[hotkey].popleft() - - # Check if the number of requests exceeds the limit - if len(BaseService.request_timestamps[hotkey]) >= cortext.MAX_REQUESTS: - return ( - True, - f"Request frequency for {hotkey} exceeded: " - f"{len(BaseService.request_timestamps[hotkey])} requests in {cortext.MIN_REQUEST_PERIOD} minutes. " - f"Limit is {cortext.MAX_REQUESTS} requests." - ) - - BaseService.request_timestamps[hotkey].append(current_time) - return False, f"accepting {synapse_type} request from {hotkey}" except Exception: diff --git a/validators/dendrite.py b/validators/dendrite.py deleted file mode 100644 index 7212186a..00000000 --- a/validators/dendrite.py +++ /dev/null @@ -1,144 +0,0 @@ -import asyncio -from collections import defaultdict - -from typing import Union, AsyncGenerator, Any, List -from enum import Enum - -from pydantic import BaseModel -import bittensor -from bittensor import dendrite, axon -import bittensor as bt -from cortext import ALL_SYNAPSE_TYPE, MIN_REQUEST_PERIOD - - -class RequestType(str, Enum): # Inherit from str to enforce the value type as string - organic_type = 'organic' - synthetic_type = 'synthetic' - - -class Request(BaseModel): - target_axon: Union[bittensor.AxonInfo, bittensor.axon] - synapse: ALL_SYNAPSE_TYPE = bittensor.Synapse() - timeout: float = 12.0 - deserialize: bool = True - type: RequestType - stream: False - request_id: int - - -class Dendrite(dendrite): - # class variable to store all status of miners. - hotkey_to_uid_capacity = defaultdict(tuple) - synthetic_requests_queue: List[Request] = [] - organic_requests_queue: List[Request] = [] - lock = asyncio.Lock() - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - - @classmethod - def push_request_queue(cls, request: Request): - if request.type == RequestType.organic_type: - cls.organic_requests_queue.append(request) - if request.type == RequestType.synthetic_type: - cls.synthetic_requests_queue.append(request) - - @classmethod - async def process_requests(cls): - while True: - task_to_request_id = {} - if cls.organic_requests_queue: - # distribute organic queries to miners according to bandwidth. - async with cls.lock: - cls.organic_requests_copy = cls.organic_requests_queue.copy() - cls.organic_requests_queue.clear() - bt.logging.info("distribute organic queries to miners according to bandwidth.") - for request in cls.organic_requests_copy: - task = asyncio.create_task(cls.create_task_from_request(request)) - task_to_request_id[task] = request.request_id - if cls.synthetic_requests_queue: - bt.logging.info("start synthetic query and test bandwidth for all miners.") - for request in cls.synthetic_requests_queue: - task = asyncio.create_task(cls.create_task_from_request(request)) - task_to_request_id[task] = request.request_id - pass - - for completed_task in asyncio.as_completed(task_to_request_id): - result = await completed_task - request_id = task_to_request_id[completed_task] - bt.logging.info(f"request {request_id} is complete. {result}") - # push result to redis. - - # wait for 1 sec so that queues are filled with requests. - await asyncio.sleep(1) - - @classmethod - async def create_task_from_request(cls, request): - uid, cap = cls.get_remaining_capacity(request) - if cap > 0: - if request.stream: - task = super().call_stream(target_axon=request.target_axon, synapse=request.synapse, - timeout=request.timeout, - deserialize=request.deserialize) - else: - task = await super().call(target_axon=request.target_axon, synapse=request.synapse, - timeout=request.timeout, - deserialize=request.deserialize) - cls.decrease_remaining_cap_after_request(request) - return task - bt.logging.info(f"can't process this request because all miners are too busy now") - return None - - @classmethod - def decrease_remaining_cap_after_request(cls, request): - target_axon = request.target_axon - synapse = request.synapse - hotkey = target_axon.info().hotkey - uid, cap = cls.hotkey_to_uid_capacity[hotkey] - provider = synapse.provider - model = synapse.model - cap[provider][model] = cap - 1 - - # update capacity with decreased by 1 for provider, model - cls.hotkey_to_uid_capacity[hotkey] = uid, cap - - - @classmethod - def get_remaining_capacity(cls, request): - target_axon = request.target_axon - synapse = request.synapse - hotkey = target_axon.info().hotkey - uid, cap = cls.hotkey_to_uid_capacity[hotkey] - provider = synapse.provider - model = synapse.model - return uid, cap.get(provider).get(model) - - @classmethod - def decrease_capacity(cls, target_axon: axon, synapse: ALL_SYNAPSE_TYPE): - pass - - async def call_stream( - self, - target_axon: Union[bittensor.AxonInfo, bittensor.axon], - synapse: ALL_SYNAPSE_TYPE = bittensor.Synapse(), # type: ignore - timeout: float = 12.0, - deserialize: bool = True - ) -> AsyncGenerator[Any, Any]: - uid, remain_cap = Dendrite.get_remaining_capacity(target_axon, synapse) - if remain_cap > 0: - # decrease capacity by one as it's used. - - return super().call_stream(target_axon, synapse, timeout, deserialize) - else: - bt.logging.debug(f"remain_cap is {remain_cap} for this uid {uid}. so can't send request.") - raise StopAsyncIteration - - async def call( - self, - target_axon: Union[bittensor.AxonInfo, bittensor.axon], - synapse: ALL_SYNAPSE_TYPE = bittensor.Synapse(), - timeout: float = 12.0, - deserialize: bool = True, - ) -> bittensor.Synapse: - pass \ No newline at end of file diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index ae0f2d89..1c49c284 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -138,7 +138,7 @@ def get_uid_to_scores_dict(self, uid_to_query_resps, scored_responses: tuple[flo if model_weight is None: bt.logging.debug(f"not weight found for this provider {provider} and model {model}") model_weight = 0 - band_width = uid_to_capacity.get(uid).bandwidth_rpm.get(f"{provider}").get(f"{model}") + band_width = uid_to_capacity.get(uid).get(f"{provider}").get(f"{model}") if band_width is None: bt.logging.debug(f"no band_width found for this uid {uid}") band_width = 1 diff --git a/validators/weight_setter.py b/validators/weight_setter.py index b91b2d73..4676f633 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -18,7 +18,6 @@ from cortext.protocol import IsAlive, StreamPrompting, ImageResponse, Embeddings from cortext.metaclasses import ValidatorRegistryMeta -from cortext import MIN_REQUEST_PERIOD from validators.services import CapacityService, BaseValidator, TextValidator, ImageValidator from validators.services.cache import QueryResponseCache from validators.utils import handle_response, error_handler, get_stream_result_as_async_gen, get_stream_result @@ -142,7 +141,7 @@ async def update_and_refresh(self, last_update): async def perform_synthetic_queries(self): while True: # wait for MIN_REQUEST_PERIOD minutes. - await asyncio.sleep(MIN_REQUEST_PERIOD * 60) + await asyncio.sleep(cortext.MIN_REQUEST_PERIOD * 60) bt.logging.info(f"start processing synthetic queries {time.time()}") start_time = time.time() # check available bandwidth and send synthetic requests to all miners. @@ -192,12 +191,12 @@ async def perform_synthetic_queries(self): def choose_validator_from_model(self, model): text_validator = ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) - image_validator = ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, - metagraph=self.metagraph) + # image_validator = ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, + # metagraph=self.metagraph) if model != 'dall-e-3': return text_validator - else: - return image_validator + # else: + # return image_validator def should_i_score(self): # Randomly decide whether to score this query based on scoring_percent @@ -211,7 +210,7 @@ async def perform_queries(self, selected_validator, uids_to_query): uids_to_query_expand = [] for provider, model in provider_to_models: for uid in uids_to_query: - band_width = self.uid_to_capacity.get(uid).bandwidth_rpm.get(f"{provider}").get(f"{model}") + band_width = self.uid_to_capacity.get(uid).get(f"{provider}").get(f"{model}") for _ in range(band_width): query_task = selected_validator.create_query(uid, provider, model) query_tasks.append(query_task) From 4138eb6e5e29ce4a6a93b692ca39fbb8a9eb2126 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 25 Sep 2024 12:03:08 -0700 Subject: [PATCH 33/80] add update module of weights after scoring queries. --- validators/weight_setter.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 4676f633..32861834 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -517,6 +517,8 @@ def get_scoring_tasks_from_query_responses(self, queries_to_process): async def process_queries_from_database(self): while True: + current_block = self.get_current_block() + last_update = self.get_last_update(current_block) await asyncio.sleep(1) # Adjust the sleep time as needed # accumulate all query results for MIN_REQUEST_PERIOD @@ -531,7 +533,6 @@ async def process_queries_from_database(self): self.synthetic_task_done = False - # with all query_respones, select one per uid, provider, model randomly and score them. score_tasks = self.get_scoring_tasks_from_query_responses(queries_to_process) @@ -544,6 +545,8 @@ async def process_queries_from_database(self): self.total_scores[uid] += score self.score_counts[uid] += 1 + await self.update_and_refresh(last_update) + @property def batch_list_of_all_uids(self): uids = list(self.available_uid_to_axons.keys()) From b425481dfea65aab8a1d3c6a9afec4addb9d9f1b Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 25 Sep 2024 12:14:03 -0700 Subject: [PATCH 34/80] update task_mgr on initialization of metagraph. --- validators/weight_setter.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 32861834..670d6b9c 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -137,11 +137,16 @@ async def update_and_refresh(self, last_update): bt.logging.info("Refreshing metagraph...") await self.refresh_metagraph() await self.initialize_uids_and_capacities() + # update task_mgr after synthetic query at the end of iterator. + self.task_mgr = TaskMgr(uid_to_capacities=self.uid_to_capacity, dendrite=self.dendrite, + metagraph=self.metagraph, redis_client=self.redis_client) + bt.logging.info("Metagraph refreshed.") async def perform_synthetic_queries(self): while True: # wait for MIN_REQUEST_PERIOD minutes. - await asyncio.sleep(cortext.MIN_REQUEST_PERIOD * 60) + # await asyncio.sleep(cortext.MIN_REQUEST_PERIOD * 60) + await asyncio.sleep(1) bt.logging.info(f"start processing synthetic queries {time.time()}") start_time = time.time() # check available bandwidth and send synthetic requests to all miners. @@ -157,6 +162,9 @@ async def perform_synthetic_queries(self): else: continue + if not query_tasks: + bt.logging.debug(f"No query tasks for synthetic.") + continue # don't process any organic query while processing synthetic queries. synthetic_task_ids = [] async with self.lock: @@ -185,7 +193,6 @@ async def perform_synthetic_queries(self): }) self.synthetic_task_done = True - bt.logging.info( f"synthetic queries has been processed successfully. total queries are {len(query_synapses)}") From 72b90c0a8d988ba1eb5ee0611805a8f949caa389 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 25 Sep 2024 12:14:54 -0700 Subject: [PATCH 35/80] update task_mgr on initialization of metagraph. --- validators/weight_setter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 670d6b9c..a43f4198 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -145,8 +145,7 @@ async def update_and_refresh(self, last_update): async def perform_synthetic_queries(self): while True: # wait for MIN_REQUEST_PERIOD minutes. - # await asyncio.sleep(cortext.MIN_REQUEST_PERIOD * 60) - await asyncio.sleep(1) + await asyncio.sleep(cortext.MIN_REQUEST_PERIOD * 60) bt.logging.info(f"start processing synthetic queries {time.time()}") start_time = time.time() # check available bandwidth and send synthetic requests to all miners. From afac62cfbe841c472e71c35101d40a0a7d23e789 Mon Sep 17 00:00:00 2001 From: acer-king Date: Thu, 26 Sep 2024 07:09:17 -0700 Subject: [PATCH 36/80] bug fixes with redis --- validators/task_manager.py | 23 ++++++++++++++++++----- validators/utils.py | 11 +++++++++-- validators/validator.py | 1 + validators/weight_setter.py | 17 +++++++++-------- validators/workers.py | 9 +++++---- 5 files changed, 42 insertions(+), 19 deletions(-) diff --git a/validators/task_manager.py b/validators/task_manager.py index 9cd2bb35..4f86b9b4 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -9,12 +9,26 @@ class TaskMgr: - def __init__(self, uid_to_capacities, dendrite, metagraph, redis_client): + def __init__(self, uid_to_capacities, dendrite, metagraph, loop): # Initialize Redis client - self.redis_client = redis_client self.remain_resources = deepcopy(uid_to_capacities) + self.uid_to_capacity = deepcopy(uid_to_capacities) self.dendrite = dendrite self.metagraph = metagraph + self.loop = loop + + def restore_capacities_for_all_miners(self): + self.remain_resources = deepcopy(self.uid_to_capacity) + + def update_remain_capacity_based_on_new_capacity(self, new_uid_to_capacity): + for uid, capacity in new_uid_to_capacity.items(): + for provider, model_to_cap in capacity.items(): + for model, cap in model_to_cap.items(): + if self.remain_resources.get(uid).get(provider).get(model) is None: + self.remain_resources[uid][provider][model] = cap + else: + diff = self.uid_to_capacity[uid][provider][model] - cap + self.remain_resources[uid][provider][model] -= diff @error_handler def assign_task(self, synapse: ALL_SYNAPSE_TYPE): @@ -31,9 +45,8 @@ def assign_task(self, synapse: ALL_SYNAPSE_TYPE): bt.logging.trace(f"Assigning task {task_id} to miner {uid}") # Push task to the selected worker's task queue - worker = Worker(synapse=synapse, dendrite=self.dendrite, axon=self.get_axon_from_uid(uid=uid), - redis_client=self.redis_client) - asyncio.create_task(worker.run_task()) + worker = Worker(synapse=synapse, dendrite=self.dendrite, axon=self.get_axon_from_uid(uid=uid)) + self.loop.create_task(worker.run_task()) return task_id def get_axon_from_uid(self, uid): diff --git a/validators/utils.py b/validators/utils.py index 907606b8..cd18d55d 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -2,6 +2,7 @@ import time import aiohttp import asyncio +import aioredis import base64 import hashlib import inspect @@ -163,10 +164,11 @@ def create_hash_value(input_string): @error_handler -async def get_stream_result_as_async_gen(redis_client, task_id): +async def get_stream_result_as_async_gen(task_id): last_id = '0' # Start reading from the beginning of the stream bt.logging.trace(f"Waiting for results of task {task_id}...") stream_name = REDIS_RESULT_STREAM + f"{task_id}" + redis_client = await get_redis_client() while True: # Read from the Redis stream @@ -188,7 +190,8 @@ async def get_stream_result_as_async_gen(redis_client, task_id): @error_handler -async def get_stream_result(redis_client, task_id): +async def get_stream_result(task_id): + redis_client = await get_redis_client() last_id = '0' # Start reading from the beginning of the stream bt.logging.trace(f"Waiting for results of task {task_id}...") stream_name = REDIS_RESULT_STREAM + f"{task_id}" @@ -228,3 +231,7 @@ def find_positive_values(data: dict): positive_values[key] = value return positive_values + +async def get_redis_client(): + redis_client = await aioredis.from_url("redis://localhost", encoding="utf-8", decode_responses=True) + return redis_client \ No newline at end of file diff --git a/validators/validator.py b/validators/validator.py index 3fdf92fe..2754a151 100644 --- a/validators/validator.py +++ b/validators/validator.py @@ -137,6 +137,7 @@ def main(): finally: bt.logging.info("stopping axon server.") weight_setter.axon.stop() + weight_setter.redis_client.close() bt.logging.info("updating status before exiting validator") state = utils.get_state(state_path) utils.save_state_to_file(state, state_path) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index a43f4198..a7601824 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -36,7 +36,6 @@ def __init__(self, config, cache: QueryResponseCache): self.in_cache_processing = False self.batch_size = config.max_miners_cnt self.cache = cache - self.redis_client = aioredis.from_url("redis://localhost", encoding="utf-8", decode_responses=True) self.start_time = time.time() self.uid_to_capacity = {} @@ -128,7 +127,7 @@ async def initialize_uids_and_capacities(self): self.score_counts = {uid: 0 for uid in self.available_uid_to_axons.keys()} self.task_mgr = TaskMgr(uid_to_capacities=self.uid_to_capacity, dendrite=self.dendrite, - metagraph=self.metagraph, redis_client=self.redis_client) + metagraph=self.metagraph, loop=self.loop) async def update_and_refresh(self, last_update): bt.logging.info(f"Setting weights, last update {last_update} blocks ago") @@ -138,8 +137,7 @@ async def update_and_refresh(self, last_update): await self.refresh_metagraph() await self.initialize_uids_and_capacities() # update task_mgr after synthetic query at the end of iterator. - self.task_mgr = TaskMgr(uid_to_capacities=self.uid_to_capacity, dendrite=self.dendrite, - metagraph=self.metagraph, redis_client=self.redis_client) + self.task_mgr.update_remain_capacity_based_on_new_capacity(self.uid_to_capacity) bt.logging.info("Metagraph refreshed.") async def perform_synthetic_queries(self): @@ -157,7 +155,7 @@ async def perform_synthetic_queries(self): # create task and send remaining requests to the miner vali = self.choose_validator_from_model(model) query_task = vali.create_query(uid, provider, model) - query_tasks.append(query_task) + query_tasks += [query_task] * bandwidth else: continue @@ -172,12 +170,15 @@ async def perform_synthetic_queries(self): task_id = self.task_mgr.assign_task(query_syn) synthetic_task_ids.append(task_id) + # restore capacities immediately after synthetic query consuming all bandwidth. + self.task_mgr.restore_capacities_for_all_miners() + bt.logging.debug(f"{time.time() - start_time} elapsed for creating and submitting synthetic queries.") # get result from all synthetic tasks synthetic_result_tasks = [] for task_id in synthetic_task_ids: - task = get_stream_result(redis_client=self.redis_client, task_id=task_id) + task = get_stream_result(task_id=task_id) synthetic_result_tasks.append(task) synthetic_results = await asyncio.gather(*synthetic_result_tasks) @@ -442,7 +443,7 @@ async def _prompt(query_synapse: StreamPrompting, send: Send): response_text = '' - async for chunk in get_stream_result_as_async_gen(task_id=task_id, redis_client=self.redis_client): + async for chunk in get_stream_result_as_async_gen(task_id=task_id): if isinstance(chunk, str): await send({ "type": "http.response.body", @@ -550,7 +551,7 @@ async def process_queries_from_database(self): for uid, score in uid_scores_dict.items(): self.total_scores[uid] += score self.score_counts[uid] += 1 - + bt.logging.info(f"current total score are {self.total_scores}") await self.update_and_refresh(last_update) @property diff --git a/validators/workers.py b/validators/workers.py index 9d5d422b..067e65bb 100644 --- a/validators/workers.py +++ b/validators/workers.py @@ -1,17 +1,18 @@ import bittensor as bt from cortext import REDIS_RESULT_STREAM, REDIS_RESULT +from validators.utils import get_redis_client class Worker: - def __init__(self, synapse, dendrite, axon, redis_client): - self.redis_client = redis_client + def __init__(self, synapse, dendrite, axon): self.synapse = synapse self.dendrite = dendrite self.axon = axon async def run_task(self): # Pull task from worker-specific queue + redis_client = await get_redis_client() task_id = self.synapse.task_id bt.logging.trace(f"Worker {task_id} received task: {self.synapse}") try: @@ -29,6 +30,6 @@ async def run_task(self): if self.synapse.streaming: async for chunk in responses[0]: if isinstance(chunk, str): - await self.redis_client.xadd(REDIS_RESULT_STREAM + f"{task_id}", {"chunk": chunk}) + await redis_client.xadd(REDIS_RESULT_STREAM + f"{task_id}", {"chunk": chunk}) else: - await self.redis_client.rpush(REDIS_RESULT, responses[0]) + await redis_client.rpush(REDIS_RESULT, responses[0]) From a2697ee3001ee469e6d2d5d2b5484eaff42ce37b Mon Sep 17 00:00:00 2001 From: acer-king Date: Thu, 26 Sep 2024 10:24:21 -0700 Subject: [PATCH 37/80] complete load balancer. --- validators/task_manager.py | 3 +++ validators/utils.py | 4 ++++ validators/validator.py | 3 +-- validators/weight_setter.py | 15 ++++++++------- validators/workers.py | 35 +++++++++++++++++------------------ 5 files changed, 33 insertions(+), 27 deletions(-) diff --git a/validators/task_manager.py b/validators/task_manager.py index 4f86b9b4..31a3cf66 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -18,6 +18,7 @@ def __init__(self, uid_to_capacities, dendrite, metagraph, loop): self.loop = loop def restore_capacities_for_all_miners(self): + bt.logging.debug(f"resource is restored. self.remain_resources = {self.remain_resources}") self.remain_resources = deepcopy(self.uid_to_capacity) def update_remain_capacity_based_on_new_capacity(self, new_uid_to_capacity): @@ -28,6 +29,8 @@ def update_remain_capacity_based_on_new_capacity(self, new_uid_to_capacity): self.remain_resources[uid][provider][model] = cap else: diff = self.uid_to_capacity[uid][provider][model] - cap + if diff: + bt.logging.debug(f"diff {diff} found in {uid}, {provider}, {model}") self.remain_resources[uid][provider][model] -= diff @error_handler diff --git a/validators/utils.py b/validators/utils.py index cd18d55d..80abd29a 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -37,6 +37,8 @@ def error_handler(func): async def wrapper(*args, **kwargs): try: result = await func(*args, **kwargs) + except GeneratorExit as err: + bt.logging.error(f"{err}. {traceback.format_exc()}") except Exception as err: bt.logging.error(f"{err}. {traceback.format_exc()}") return None @@ -186,6 +188,7 @@ async def get_stream_result_as_async_gen(task_id): bt.logging.trace("No new results. stop generation.") break bt.logging.trace(f"stream exit. delete old stream from queue.") + await redis_client.close() await redis_client.delete(stream_name) @@ -214,6 +217,7 @@ async def get_stream_result(task_id): break bt.logging.trace(f"stream exit. delete old stream from queue.") await redis_client.delete(stream_name) + await redis_client.close() return full_response, time.time() - start_time diff --git a/validators/validator.py b/validators/validator.py index 2754a151..a545d9b0 100644 --- a/validators/validator.py +++ b/validators/validator.py @@ -127,7 +127,7 @@ def main(): init_wandb(config) loop = asyncio.get_event_loop() - weight_setter = WeightSetter(config=config, cache=cache_service) + weight_setter = WeightSetter(config=config, cache=cache_service, loop=loop) state_path = os.path.join(config.full_path, "state.json") utils.get_state(state_path) try: @@ -137,7 +137,6 @@ def main(): finally: bt.logging.info("stopping axon server.") weight_setter.axon.stop() - weight_setter.redis_client.close() bt.logging.info("updating status before exiting validator") state = utils.get_state(state_path) utils.save_state_to_file(state, state_path) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index a7601824..705ab245 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -4,7 +4,6 @@ import torch import traceback import time -import aioredis from black.trans import defaultdict from substrateinterface import SubstrateInterface @@ -28,7 +27,7 @@ class WeightSetter: - def __init__(self, config, cache: QueryResponseCache): + def __init__(self, config, cache: QueryResponseCache, loop=None): # Cache object using sqlite3. self.synthetic_task_done = False @@ -71,7 +70,7 @@ def __init__(self, config, cache: QueryResponseCache): # Set up async-related attributes self.lock = asyncio.Lock() - self.loop = asyncio.get_event_loop() + self.loop = loop or asyncio.get_event_loop() # Initialize shared query database self.query_database = [] @@ -126,8 +125,12 @@ async def initialize_uids_and_capacities(self): self.total_scores = {uid: 0.0 for uid in self.available_uid_to_axons.keys()} self.score_counts = {uid: 0 for uid in self.available_uid_to_axons.keys()} - self.task_mgr = TaskMgr(uid_to_capacities=self.uid_to_capacity, dendrite=self.dendrite, - metagraph=self.metagraph, loop=self.loop) + # update task_mgr after synthetic query at the end of iterator. + if self.task_mgr: + self.task_mgr.update_remain_capacity_based_on_new_capacity(self.uid_to_capacity) + else: + self.task_mgr = TaskMgr(uid_to_capacities=self.uid_to_capacity, dendrite=self.dendrite, + metagraph=self.metagraph, loop=self.loop) async def update_and_refresh(self, last_update): bt.logging.info(f"Setting weights, last update {last_update} blocks ago") @@ -136,8 +139,6 @@ async def update_and_refresh(self, last_update): bt.logging.info("Refreshing metagraph...") await self.refresh_metagraph() await self.initialize_uids_and_capacities() - # update task_mgr after synthetic query at the end of iterator. - self.task_mgr.update_remain_capacity_based_on_new_capacity(self.uid_to_capacity) bt.logging.info("Metagraph refreshed.") async def perform_synthetic_queries(self): diff --git a/validators/workers.py b/validators/workers.py index 067e65bb..457c9184 100644 --- a/validators/workers.py +++ b/validators/workers.py @@ -1,6 +1,6 @@ import bittensor as bt from cortext import REDIS_RESULT_STREAM, REDIS_RESULT -from validators.utils import get_redis_client +from validators.utils import get_redis_client, error_handler class Worker: @@ -10,26 +10,25 @@ def __init__(self, synapse, dendrite, axon): self.dendrite = dendrite self.axon = axon + @error_handler async def run_task(self): # Pull task from worker-specific queue redis_client = await get_redis_client() task_id = self.synapse.task_id bt.logging.trace(f"Worker {task_id} received task: {self.synapse}") - try: - await self.dendrite.aclose_session() - responses = await self.dendrite( - axons=[self.axon], - synapse=self.synapse, - deserialize=self.synapse.deserialize_flag, - timeout=self.synapse.timeout, - streaming=self.synapse.streaming, - ) - except Exception as err: - bt.logging.exception(err) + + await self.dendrite.aclose_session() + responses = await self.dendrite( + axons=[self.axon], + synapse=self.synapse, + deserialize=self.synapse.deserialize_flag, + timeout=self.synapse.timeout, + streaming=self.synapse.streaming, + ) + if self.synapse.streaming: + async for chunk in responses[0]: + if isinstance(chunk, str): + await redis_client.xadd(REDIS_RESULT_STREAM + f"{task_id}", {"chunk": chunk}) else: - if self.synapse.streaming: - async for chunk in responses[0]: - if isinstance(chunk, str): - await redis_client.xadd(REDIS_RESULT_STREAM + f"{task_id}", {"chunk": chunk}) - else: - await redis_client.rpush(REDIS_RESULT, responses[0]) + await redis_client.rpush(REDIS_RESULT, responses[0]) + await redis_client.close() From 1cfceaa01c43e03e4b7b0b13893750bb739effad Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 27 Sep 2024 04:59:14 +0000 Subject: [PATCH 38/80] remove stricter reqs --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2adae9f6..d5e23386 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,10 +10,10 @@ transformers==4.* wandb anthropic==0.19.2 stability-sdk -boto3==1.34.131 +boto3 anthropic_bedrock pyOpenSSL==24.* google-generativeai groq==0.5.0 -aioboto3==13.1.1 +aioboto3 aioredis==2.0.1 \ No newline at end of file From ed31a79e73c44a90a63b4cbc7c4371d4bb82a5d1 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 27 Sep 2024 05:07:51 +0000 Subject: [PATCH 39/80] fix --- cache.db | Bin 0 -> 16384 bytes requirements.txt | 2 +- validators/utils.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 cache.db diff --git a/cache.db b/cache.db new file mode 100644 index 0000000000000000000000000000000000000000..b0c0413f7cb88861ade9d8dbe293f358da3ff39b GIT binary patch literal 16384 zcmeI#O-sWt7{Kv#8%_p$xk#@$IvB$6EI74?;hZ{q*bpprjaXdQy>xRPhbK>dGCzw~ zkG8>z)8Pjg{|Azg=1H6UnoF)OTYjYET{sEcNbZY@Fbr`hr4S+$mt0)Z_}w?sfq&YJ z*qgr;%O4_JdKIPj@>|>x2q1s}0tg_000IagfB*sr++_1+-87blKlIeX_ZH4*63%^3 zO`IU~)S&P7?^W)&V>N6`HrppwSN=_s=WVGIHM3=4.2.0 \ No newline at end of file diff --git a/validators/utils.py b/validators/utils.py index 80abd29a..7d754a34 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -2,7 +2,7 @@ import time import aiohttp import asyncio -import aioredis +import redis.asyncio as aioredis import base64 import hashlib import inspect From 9744760654a1c6e75e6a7c77514b7e8aedf521ae Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 27 Sep 2024 05:14:58 +0000 Subject: [PATCH 40/80] don't init wallet twice --- validators/validator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/validators/validator.py b/validators/validator.py index a545d9b0..a99587e2 100644 --- a/validators/validator.py +++ b/validators/validator.py @@ -91,8 +91,7 @@ def init_wandb(config): if not config.wandb_on: return - wallet = bt.wallet(name=config.wallet.name, hotkey=config.wallet.hotkey) - run_name = f"validator-{wallet.hotkey.ss58_address}-{cortext.__version__}" + run_name = f"validator-{config.wallet.hotkey.ss58_address}-{cortext.__version__}" config.run_name = run_name config.version = cortext.__version__ config.type = "validator" @@ -106,7 +105,7 @@ def init_wandb(config): reinit=True ) - signature = wallet.hotkey.sign(run.id.encode()).hex() + signature = config.wallet.hotkey.sign(run.id.encode()).hex() config.signature = signature wandb.config.update(config.__dict__, allow_val_change=True) From 02f68bb2253815730e14ade2012e908bc438f4d1 Mon Sep 17 00:00:00 2001 From: surcyf123 Date: Fri, 27 Sep 2024 05:51:25 +0000 Subject: [PATCH 41/80] add .env to .gitignore --- .gitignore | 3 ++- validators/validator.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 726492f6..6572d92f 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ validators/.ipynb_checkpoints/ **/validator.ipynb **/.env **/Cortex.t.egg-info -**/test.ipynb \ No newline at end of file +**/test.ipynb +.env \ No newline at end of file diff --git a/validators/validator.py b/validators/validator.py index a99587e2..8ead33d9 100644 --- a/validators/validator.py +++ b/validators/validator.py @@ -55,7 +55,7 @@ def get(self, key, default=None): def parse_arguments(): parser = argparse.ArgumentParser(description="Validator Configuration") - parser.add_argument("--subtensor.chain_endpoint", type=str, default="wss://entrypoint-finney.opentensor.ai:443") + parser.add_argument("--subtensor.chain_endpoint", type=str, default="wss://entrypoint-finney.opentensor.ai:443") #for testnet: wss://test.finney.opentensor.ai:443 parser.add_argument("--wallet.name", type=str, default="default") parser.add_argument("--wallet.hotkey", type=str, default="default") parser.add_argument("--netuid", type=int, default=18) From bba59ac46d79875b62d7eec3b11c005e6b7841aa Mon Sep 17 00:00:00 2001 From: surcyf123 Date: Fri, 27 Sep 2024 05:54:46 +0000 Subject: [PATCH 42/80] fix miner logging --- miner/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/miner/config.py b/miner/config.py index 7cdcb40f..68478ec1 100644 --- a/miner/config.py +++ b/miner/config.py @@ -36,7 +36,8 @@ def __init__(self): self.BT_SUBTENSOR_NETWORK = 'test' if self.ENV == 'test' else 'finney' self.WANDB_OFF = False if self.ENV == 'prod' else True - self.LOGGING_TRACE = False if self.ENV == 'prod' else True + # still can use the --logging.debug and --logging.trace to turn on logging + self.LOGGING_TRACE = False # if self.ENV == 'prod' else True self.BLACKLIST_AMT = 5000 if self.ENV == 'prod' else 0 self.BLOCKS_PER_EPOCH = int(os.getenv('BLOCKS_PER_EPOCH', 100)) self.WAIT_NEXT_BLOCK_TIME = int(os.getenv('WAIT_NEXT_BLOCK_TIME', 1)) From 465b4e3f8f47c69f8442fbbcfa1e65de5b2137a3 Mon Sep 17 00:00:00 2001 From: surcyf123 Date: Fri, 27 Sep 2024 05:59:50 +0000 Subject: [PATCH 43/80] add block to logging --- validators/weight_setter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 705ab245..c4fdc227 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -145,7 +145,7 @@ async def perform_synthetic_queries(self): while True: # wait for MIN_REQUEST_PERIOD minutes. await asyncio.sleep(cortext.MIN_REQUEST_PERIOD * 60) - bt.logging.info(f"start processing synthetic queries {time.time()}") + bt.logging.info(f"start processing synthetic queries at block {self.get_current_block()} at time {time.time()}") start_time = time.time() # check available bandwidth and send synthetic requests to all miners. query_tasks = [] From fc9060d17828893d6a2c51be660074aab2e7456a Mon Sep 17 00:00:00 2001 From: surcyf123 Date: Fri, 27 Sep 2024 06:02:42 +0000 Subject: [PATCH 44/80] start with only 3 models --- cortext/constants.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/cortext/constants.py b/cortext/constants.py index 9f71ff82..91a39757 100644 --- a/cortext/constants.py +++ b/cortext/constants.py @@ -11,39 +11,39 @@ # from https://openai.com/api/pricing/ "OpenAI": { "gpt-4o": 15.00, - "gpt-4o-mini": 0.600, - "gpt-3.5-turbo": 2.00, - "o1-preview": 60.00, - "o1-mini": 12.00, + # "gpt-4o-mini": 0.600, + # "gpt-3.5-turbo": 2.00, + # "o1-preview": 60.00, + # "o1-mini": 12.00, }, # from https://ai.google.dev/pricing - "Gemini": { - "gemini-1.5-flash": 0.30, - "gemini-1.5-pro": 10.50, - }, + # "Gemini": { + # "gemini-1.5-flash": 0.30, + # "gemini-1.5-pro": 10.50, + # }, # "Anthropic": { "claude-3-5-sonnet-20240620": 15.00, # "claude-3-opus-20240229": 75, - "claude-3-haiku-20240307": 1.25, + # "claude-3-haiku-20240307": 1.25, }, # model IDs from https://console.groq.com/docs/tool-use?hss_channel=tw-842860575289819136 # prices not available yet, default to bedrock pricing # free tier: 30 rpm "Groq": { - "gemma2-9b-it": 0.22, - "llama-3.1-8b-instant": 0.22, + # "gemma2-9b-it": 0.22, + # "llama-3.1-8b-instant": 0.22, "llama-3.1-70b-versatile": .99, # "llama-3.1-405b-reasoning": 16, - "mixtral-8x7b-32768": 0.7, + # "mixtral-8x7b-32768": 0.7, }, # from https://aws.amazon.com/bedrock/pricing/ # model IDs from https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns "Bedrock": { - "mistral.mixtral-8x7b-instruct-v0:1": 0.7, - "mistral.mistral-large-2402-v1:0": 24, - "meta.llama3-1-8b-instruct-v1:0": 0.22, - "meta.llama3-1-70b-instruct-v1:0": 0.99, + # "mistral.mixtral-8x7b-instruct-v0:1": 0.7, + # "mistral.mistral-large-2402-v1:0": 24, + # "meta.llama3-1-8b-instruct-v1:0": 0.22, + # "meta.llama3-1-70b-instruct-v1:0": 0.99, # "meta.llama3-1-405b-instruct-v1:0": 16, } } From c97cf8bfb91d677fd8d29b479416694263fc75c2 Mon Sep 17 00:00:00 2001 From: surcyf123 Date: Fri, 27 Sep 2024 06:13:18 +0000 Subject: [PATCH 45/80] make example bandwidht --- cortext/constants.py | 40 ++++++++++++++++++------------------- validators/weight_setter.py | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/cortext/constants.py b/cortext/constants.py index 91a39757..85e71be8 100644 --- a/cortext/constants.py +++ b/cortext/constants.py @@ -50,40 +50,40 @@ bandwidth_to_model = { "OpenAI": { - "gpt-4o": 1, - "gpt-4o-mini": 1, - "gpt-3.5-turbo": 1, - "o1-preview": 1, - "o1-mini": 1, + "gpt-4o": 3, + # "gpt-4o-mini": 1, + # "gpt-3.5-turbo": 1, + # "o1-preview": 1, + # "o1-mini": 1, }, # from https://ai.google.dev/pricing - "Gemini": { - "gemini-1.5-flash": 1, - "gemini-1.5-pro": 1, - }, + # "Gemini": { + # "gemini-1.5-flash": 1, + # "gemini-1.5-pro": 1, + # }, # "Anthropic": { - "claude-3-5-sonnet-20240620": 1, + "claude-3-5-sonnet-20240620": 2, # "claude-3-opus-20240229": 1, - "claude-3-haiku-20240307": 1, + # "claude-3-haiku-20240307": 1, }, # model IDs from https://console.groq.com/docs/tool-use?hss_channel=tw-842860575289819136 # prices not available yet, default to bedrock pricing # free tier: 30 rpm "Groq": { - "gemma2-9b-it": 1, - "llama-3.1-8b-instant": 1, + # "gemma2-9b-it": 1, + # "llama-3.1-8b-instant": 1, "llama-3.1-70b-versatile": 1, # "llama-3.1-405b-reasoning": 16, - "mixtral-8x7b-32768": 1, + # "mixtral-8x7b-32768": 1, }, # from https://aws.amazon.com/bedrock/pricing/ # model IDs from https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html#model-ids-arns - "Bedrock": { - "mistral.mixtral-8x7b-instruct-v0:1": 1, - "mistral.mistral-large-2402-v1:0": 1, - "meta.llama3-1-8b-instruct-v1:0": 1, - "meta.llama3-1-70b-instruct-v1:0": 1, + # "Bedrock": { + # "mistral.mixtral-8x7b-instruct-v0:1": 1, + # "mistral.mistral-large-2402-v1:0": 1, + # "meta.llama3-1-8b-instruct-v1:0": 1, + # "meta.llama3-1-70b-instruct-v1:0": 1, # "meta.llama3-1-405b-instruct-v1:0": 16, - } + # } } diff --git a/validators/weight_setter.py b/validators/weight_setter.py index c4fdc227..6b6a1438 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -52,7 +52,7 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): # Scoring and querying parameters self.MIN_SCORED_QUERIES = 1 # Minimum number of times each UID should be scored per epoch - self.scoring_percent = 1 # Percentage of total queries that will be scored + # self.scoring_percent = 1 # Percentage of total queries that will be scored self.TOTAL_QUERIES_PER_UID = int(self.MIN_SCORED_QUERIES / self.scoring_percent) self.max_score_cnt_per_model = 1 bt.logging.info(f"Each UID will receive {self.TOTAL_QUERIES_PER_UID} total queries, " From f14527830dc93e318a4de379414fb4ae3a208522 Mon Sep 17 00:00:00 2001 From: surcyf123 Date: Fri, 27 Sep 2024 06:18:18 +0000 Subject: [PATCH 46/80] fix variable names --- cortext/__init__.py | 2 +- cortext/constants.py | 2 +- validators/services/capacity.py | 4 ++-- validators/weight_setter.py | 4 ---- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/cortext/__init__.py b/cortext/__init__.py index 63e47cff..6cf146a3 100644 --- a/cortext/__init__.py +++ b/cortext/__init__.py @@ -52,7 +52,7 @@ IMAGE_BLACKLIST_STAKE = 5000 EMBEDDING_BLACKLIST_STAKE = 5000 ISALIVE_BLACKLIST_STAKE = min(PROMPT_BLACKLIST_STAKE, IMAGE_BLACKLIST_STAKE, EMBEDDING_BLACKLIST_STAKE) -MIN_REQUEST_PERIOD = 7.2 +REQUEST_PERIOD = .33 MAX_REQUESTS = 20 # must have the test_key whitelisted to avoid a global blacklist testnet_key = ["5EhEZN6soubtKJm8RN7ANx9FGZ2JezxBUFxr45cdsHtDp3Uk"] diff --git a/cortext/constants.py b/cortext/constants.py index 85e71be8..6abb4097 100644 --- a/cortext/constants.py +++ b/cortext/constants.py @@ -50,7 +50,7 @@ bandwidth_to_model = { "OpenAI": { - "gpt-4o": 3, + "gpt-4o": 2, # "gpt-4o-mini": 1, # "gpt-3.5-turbo": 1, # "o1-preview": 1, diff --git a/validators/services/capacity.py b/validators/services/capacity.py index 2231f406..318b6e70 100644 --- a/validators/services/capacity.py +++ b/validators/services/capacity.py @@ -3,7 +3,7 @@ from typing import List from cortext.protocol import Bandwidth -from cortext import MIN_REQUEST_PERIOD +from cortext import REQUEST_PERIOD import bittensor as bt @@ -14,7 +14,7 @@ def __init__(self, metagraph, dendrite): self.timeout = 4 self.uid_to_capacity = {} self.remain_uid_to_capacity = {} - self.epoch_len = MIN_REQUEST_PERIOD + self.epoch_len = REQUEST_PERIOD async def query_capacity_to_miners(self, available_uids): capacity_query_tasks = [] diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 6b6a1438..ea789632 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -52,11 +52,7 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): # Scoring and querying parameters self.MIN_SCORED_QUERIES = 1 # Minimum number of times each UID should be scored per epoch - # self.scoring_percent = 1 # Percentage of total queries that will be scored - self.TOTAL_QUERIES_PER_UID = int(self.MIN_SCORED_QUERIES / self.scoring_percent) self.max_score_cnt_per_model = 1 - bt.logging.info(f"Each UID will receive {self.TOTAL_QUERIES_PER_UID} total queries, " - f"with {self.MIN_SCORED_QUERIES} of them being scored.") # Initialize scores and counts self.total_scores = {} From b1e0d82be82e2a8dfb65cf11fb89450b0a782e63 Mon Sep 17 00:00:00 2001 From: surcyf123 Date: Fri, 27 Sep 2024 06:18:39 +0000 Subject: [PATCH 47/80] fix variable names --- validators/weight_setter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index ea789632..9adf4550 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -140,7 +140,7 @@ async def update_and_refresh(self, last_update): async def perform_synthetic_queries(self): while True: # wait for MIN_REQUEST_PERIOD minutes. - await asyncio.sleep(cortext.MIN_REQUEST_PERIOD * 60) + await asyncio.sleep(cortext.REQUEST_PERIOD * 60) bt.logging.info(f"start processing synthetic queries at block {self.get_current_block()} at time {time.time()}") start_time = time.time() # check available bandwidth and send synthetic requests to all miners. From 3fd1bae675d67e35d4d3900f5752f45a97ec8484 Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 27 Sep 2024 00:19:46 -0700 Subject: [PATCH 48/80] code clean up --- validators/utils.py | 87 ++++----------------------- validators/weight_setter.py | 116 +++++------------------------------- validators/workers.py | 8 +-- 3 files changed, 30 insertions(+), 181 deletions(-) diff --git a/validators/utils.py b/validators/utils.py index 7d754a34..57940ba6 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -2,7 +2,7 @@ import time import aiohttp import asyncio -import redis.asyncio as aioredis +import redis import base64 import hashlib import inspect @@ -70,71 +70,6 @@ async def handle_response_stream(responses) -> tuple[str, str]: return full_response -def handle_response(func): - @wraps(func) - async def wrapper(*args, **kwargs): - try: - start_time = time.time() - response = await func(*args, **kwargs) - if inspect.isasyncgen(response): - result = await handle_response_stream(response) - return result, time.time() - start_time - elif isinstance(response, ImageResponse): - response.process_time = time.time() - start_time - return response - else: - bt.logging.error(f"Not found response type: {type(response)}") - return None - except Exception as err: - bt.logging.exception(f"Exception during query for uid {args[1]}, {err}") - return None - - return wrapper - - -def apply_for_time_penalty_to_uid_scores(func): - @wraps(func) - async def wrapper(*args, **kwargs): - uid_to_scores, scores, resps = await func(*args, **kwargs) - for uid, query_resp in resps: - resp_synapse = query_resp.get("response") - if isinstance(resp_synapse, ImageResponse): - # apply penalty for image task. - score = uid_to_scores[uid] - factor = 64 - max_penalty = 0.5 - if resp_synapse.process_time < 5: - bt.logging.trace(f"process time is less than 5 sec. so don't apply penalty for uid {uid}") - else: - penalty = min(max_penalty * pow(resp_synapse.process_time, 1.5) / pow(factor, 1.5), max_penalty) - bt.logging.trace(f"penalty {penalty} is applied to miner {uid} " - f"for process time {resp_synapse.process_time}") - score -= penalty - uid_to_scores[uid] = max(score, 0) - elif isinstance(resp_synapse, tuple): - # apply penalty for streaming task. - resp_str, process_time = resp_synapse - total_work_done = len(resp_str) - chars_per_sec = total_work_done / process_time - bt.logging.debug(f"speed of streaming is {chars_per_sec} chars per second") - - base_speed = 50 - if chars_per_sec >= base_speed: - bt.logging.trace(f"don't apply penalty for this uid {uid}") - else: - max_penalty = 0.5 - penalty = min((base_speed - chars_per_sec) / base_speed, max_penalty) # max penalty is 0.5 - new_score = max(uid_to_scores[uid] - penalty, 0) - bt.logging.debug(f"penalty is {penalty}, new_score is {new_score} for uid {uid}") - uid_to_scores[uid] = new_score - else: - pass - - return uid_to_scores, scores, resps - - return wrapper - - def save_answer_to_cache(func): @wraps(func) async def wrapper(*args, **kwargs): @@ -170,11 +105,11 @@ async def get_stream_result_as_async_gen(task_id): last_id = '0' # Start reading from the beginning of the stream bt.logging.trace(f"Waiting for results of task {task_id}...") stream_name = REDIS_RESULT_STREAM + f"{task_id}" - redis_client = await get_redis_client() + redis_client = get_redis_client() while True: # Read from the Redis stream - result_entries = await redis_client.xread({stream_name: last_id}, block=5000) + result_entries = redis_client.xread({stream_name: last_id}, block=5000) result_entries = result_entries or [] if result_entries: for entry in result_entries: @@ -188,13 +123,13 @@ async def get_stream_result_as_async_gen(task_id): bt.logging.trace("No new results. stop generation.") break bt.logging.trace(f"stream exit. delete old stream from queue.") - await redis_client.close() - await redis_client.delete(stream_name) + redis_client.delete(stream_name) + redis_client.close() @error_handler async def get_stream_result(task_id): - redis_client = await get_redis_client() + redis_client = get_redis_client() last_id = '0' # Start reading from the beginning of the stream bt.logging.trace(f"Waiting for results of task {task_id}...") stream_name = REDIS_RESULT_STREAM + f"{task_id}" @@ -202,7 +137,7 @@ async def get_stream_result(task_id): start_time = time.time() while True: # Read from the Redis stream - result_entries = await redis_client.xread({stream_name: last_id}, block=5000) + result_entries = redis_client.xread({stream_name: last_id}, block=5000) result_entries = result_entries or [] if result_entries: for entry in result_entries: @@ -216,8 +151,8 @@ async def get_stream_result(task_id): bt.logging.trace("No new results. stop generation.") break bt.logging.trace(f"stream exit. delete old stream from queue.") - await redis_client.delete(stream_name) - await redis_client.close() + redis_client.delete(stream_name) + redis_client.close() return full_response, time.time() - start_time @@ -236,6 +171,6 @@ def find_positive_values(data: dict): return positive_values -async def get_redis_client(): - redis_client = await aioredis.from_url("redis://localhost", encoding="utf-8", decode_responses=True) +def get_redis_client(): + redis_client = redis.from_url("redis://localhost", encoding="utf-8", decode_responses=True) return redis_client \ No newline at end of file diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 9adf4550..897200a9 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -19,7 +19,7 @@ from cortext.metaclasses import ValidatorRegistryMeta from validators.services import CapacityService, BaseValidator, TextValidator, ImageValidator from validators.services.cache import QueryResponseCache -from validators.utils import handle_response, error_handler, get_stream_result_as_async_gen, get_stream_result +from validators.utils import error_handler, get_stream_result_as_async_gen, get_stream_result from validators.task_manager import TaskMgr from validators.models.enum import QueryType @@ -86,28 +86,6 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): async def run_sync_in_async(self, fn): return await self.loop.run_in_executor(self.thread_executor, fn) - def get_current_block(self): - return self.node.query("System", "Number", []).value - - def get_weights_rate_limit(self): - return self.node.query("SubtensorModule", "WeightsSetRateLimit", [self.netuid]).value - - def get_last_update(self, block): - try: - last_update_blocks = block - self.node.query("SubtensorModule", "LastUpdate", [self.netuid]).value[ - self.my_uid] - except Exception as err: - bt.logging.error(f"Error getting last update: {traceback.format_exc()}") - bt.logging.exception(err) - # Means that the validator is not registered yet. - last_update_blocks = 1000 - - bt.logging.trace(f"Last set weights successfully {last_update_blocks} blocks ago") - return last_update_blocks - - def get_blocks_til_epoch(self, block): - return self.tempo - (block + 19) % (self.tempo + 1) - async def refresh_metagraph(self): await self.run_sync_in_async(lambda: self.metagraph.sync()) @@ -128,10 +106,8 @@ async def initialize_uids_and_capacities(self): self.task_mgr = TaskMgr(uid_to_capacities=self.uid_to_capacity, dendrite=self.dendrite, metagraph=self.metagraph, loop=self.loop) - async def update_and_refresh(self, last_update): - bt.logging.info(f"Setting weights, last update {last_update} blocks ago") + async def update_and_refresh(self): await self.update_weights() - bt.logging.info("Refreshing metagraph...") await self.refresh_metagraph() await self.initialize_uids_and_capacities() @@ -202,68 +178,6 @@ def choose_validator_from_model(self, model): # else: # return image_validator - def should_i_score(self): - # Randomly decide whether to score this query based on scoring_percent - return random.random() < self.scoring_percent - - async def perform_queries(self, selected_validator, uids_to_query): - query_responses = [] - response_tasks = [] - query_tasks = [] - provider_to_models = selected_validator.get_provider_to_models() - uids_to_query_expand = [] - for provider, model in provider_to_models: - for uid in uids_to_query: - band_width = self.uid_to_capacity.get(uid).get(f"{provider}").get(f"{model}") - for _ in range(band_width): - query_task = selected_validator.create_query(uid, provider, model) - query_tasks.append(query_task) - uids_to_query_expand.append(uid) - - queries = await asyncio.gather(*query_tasks) - for uid, query in zip(uids_to_query_expand, queries): - response_tasks.append(self.query_miner(uid, query)) - - responses = await asyncio.gather(*response_tasks) - for uid, query_syn, response in zip(uids_to_query_expand, queries, responses): - query_responses.append((uid, {'query': query_syn, 'response': response})) - return query_responses - - @handle_response - async def query_miner(self, uid, synapse): - axon = self.metagraph.axons[uid] - - streaming = False - if isinstance(synapse, bt.StreamingSynapse): - streaming = True - - responses = await self.dendrite( - axons=[axon], - synapse=synapse, - deserialize=False, - timeout=synapse.timeout, - streaming=streaming, - ) - # Handle the response appropriately - return responses[0] # Assuming responses is a list - - def select_validator(self) -> BaseValidator: - rand = random.random() - text_validator = ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) - image_validator = ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, - metagraph=self.metagraph) - if rand > self.config.image_validator_probability: - return text_validator - else: - return image_validator - - def get_validators(self) -> List[BaseValidator]: - validators = [] - all_classes = ValidatorRegistryMeta.all_classes() - for class_name, class_ref in all_classes.items(): - validator = ValidatorRegistryMeta.get_class(class_name)(config=self.config, metagraph=self.metagraph) - validators.append(validator) - return validators async def get_capacities_for_uids(self, uids): capacity_service = CapacityService(metagraph=self.metagraph, dendrite=self.dendrite) @@ -521,8 +435,6 @@ def get_scoring_tasks_from_query_responses(self, queries_to_process): async def process_queries_from_database(self): while True: - current_block = self.get_current_block() - last_update = self.get_last_update(current_block) await asyncio.sleep(1) # Adjust the sleep time as needed # accumulate all query results for MIN_REQUEST_PERIOD @@ -549,7 +461,7 @@ async def process_queries_from_database(self): self.total_scores[uid] += score self.score_counts[uid] += 1 bt.logging.info(f"current total score are {self.total_scores}") - await self.update_and_refresh(last_update) + await self.update_and_refresh() @property def batch_list_of_all_uids(self): @@ -564,15 +476,17 @@ def batch_list_of_all_uids(self): async def process_queries_from_cache_database(self): # await self.initialize_uids_and_capacities() tasks = [] - for vali in self.get_validators(): - for provider, model in vali.get_provider_to_models(): - questions_answers: List[Tuple[str, str]] = self.cache.get_all_question_to_answers(provider, model) - if not questions_answers: - continue - # select one of questions_answers - query, answer = random.choice(questions_answers) - query_syn = vali.get_synapse_from_json(query) - tasks.append(self.score_miners_based_cached_answer(vali, query_syn, answer)) + for uid in self.uid_to_capacity.keys(): + for provider, model_to_bandwidth in self.uid_to_capacity.get(uid).items(): + for model, bandwidth in model_to_bandwidth.items(): + vali = self.choose_validator_from_model(model) + questions_answers: List[Tuple[str, str]] = self.cache.get_all_question_to_answers(provider, model) + if not questions_answers: + continue + # select one of questions_answers + query, answer = random.choice(questions_answers) + query_syn = vali.get_synapse_from_json(query) + tasks.append(self.score_miners_based_cached_answer(vali, query_syn, answer)) # process tasks in batch_size to not exceed max request per 2min. batched_task_list = [] @@ -582,7 +496,7 @@ async def process_queries_from_cache_database(self): start_time = time.time() await asyncio.gather(*batch_tasks) passed_time = time.time() - start_time - sleep_time = max(cortext.MIN_REQUEST_PERIOD * 60 - passed_time, 1) + sleep_time = max(cortext.REQUEST_PERIOD * 60 - passed_time, 1) bt.logging.debug(f"wait time {sleep_time} to not exceed max_request {cortext.MAX_REQUESTS} in 2min") await asyncio.sleep(sleep_time) diff --git a/validators/workers.py b/validators/workers.py index 457c9184..0f4b3377 100644 --- a/validators/workers.py +++ b/validators/workers.py @@ -13,7 +13,7 @@ def __init__(self, synapse, dendrite, axon): @error_handler async def run_task(self): # Pull task from worker-specific queue - redis_client = await get_redis_client() + redis_client = get_redis_client() task_id = self.synapse.task_id bt.logging.trace(f"Worker {task_id} received task: {self.synapse}") @@ -28,7 +28,7 @@ async def run_task(self): if self.synapse.streaming: async for chunk in responses[0]: if isinstance(chunk, str): - await redis_client.xadd(REDIS_RESULT_STREAM + f"{task_id}", {"chunk": chunk}) + redis_client.xadd(REDIS_RESULT_STREAM + f"{task_id}", {"chunk": chunk}) else: - await redis_client.rpush(REDIS_RESULT, responses[0]) - await redis_client.close() + redis_client.rpush(REDIS_RESULT, responses[0]) + redis_client.close() From 97fc046d49cdb1264a23fe6e9f3d4cdf56f1fdfb Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 27 Sep 2024 02:51:35 -0700 Subject: [PATCH 49/80] change aiordis to hiredis to boost performance --- validators/utils.py | 47 +++++++++++++++++++++++-------------- validators/weight_setter.py | 19 ++++----------- validators/workers.py | 4 +++- 3 files changed, 38 insertions(+), 32 deletions(-) diff --git a/validators/utils.py b/validators/utils.py index 57940ba6..85d362dc 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -101,28 +101,40 @@ def create_hash_value(input_string): @error_handler -async def get_stream_result_as_async_gen(task_id): +async def get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt): + result_entries = None + while max_try_cnt: + result_entries = redis_client.xread({stream_name: last_id}, block=100) + await asyncio.sleep(0.1) + if result_entries: + break + else: + max_try_cnt -= 1 + return result_entries + + +@error_handler +async def get_stream_as_async_gen(task_id): last_id = '0' # Start reading from the beginning of the stream bt.logging.trace(f"Waiting for results of task {task_id}...") stream_name = REDIS_RESULT_STREAM + f"{task_id}" redis_client = get_redis_client() + full_response = '' + result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=50) - while True: + while result_entries: # Read from the Redis stream - result_entries = redis_client.xread({stream_name: last_id}, block=5000) - result_entries = result_entries or [] - if result_entries: - for entry in result_entries: - stream_name, results = entry - for result_id, data in results: - result_chunk = data['chunk'] - last_id = result_id - bt.logging.trace(result_chunk) - yield result_chunk - else: - bt.logging.trace("No new results. stop generation.") - break - bt.logging.trace(f"stream exit. delete old stream from queue.") + for entry in result_entries: + stream_name, results = entry + for result_id, data in results: + result_chunk = data['chunk'] + last_id = result_id + bt.logging.trace(result_chunk) + full_response += result_chunk + yield result_chunk + result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=50) + + bt.logging.debug(f"stream exit. delete old stream from queue. {full_response}") redis_client.delete(stream_name) redis_client.close() @@ -171,6 +183,7 @@ def find_positive_values(data: dict): return positive_values + def get_redis_client(): redis_client = redis.from_url("redis://localhost", encoding="utf-8", decode_responses=True) - return redis_client \ No newline at end of file + return redis_client diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 897200a9..74e60bb5 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -2,7 +2,6 @@ import concurrent import random import torch -import traceback import time from black.trans import defaultdict @@ -19,7 +18,7 @@ from cortext.metaclasses import ValidatorRegistryMeta from validators.services import CapacityService, BaseValidator, TextValidator, ImageValidator from validators.services.cache import QueryResponseCache -from validators.utils import error_handler, get_stream_result_as_async_gen, get_stream_result +from validators.utils import error_handler, get_stream_as_async_gen, get_stream_result from validators.task_manager import TaskMgr from validators.models.enum import QueryType @@ -35,7 +34,6 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): self.in_cache_processing = False self.batch_size = config.max_miners_cnt self.cache = cache - self.start_time = time.time() self.uid_to_capacity = {} self.available_uid_to_axons = {} @@ -51,12 +49,11 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): bt.logging.info(f"Running validator on subnet: {self.netuid} with uid: {self.my_uid}") # Scoring and querying parameters - self.MIN_SCORED_QUERIES = 1 # Minimum number of times each UID should be scored per epoch self.max_score_cnt_per_model = 1 # Initialize scores and counts self.total_scores = {} - self.score_counts = {} # Number of times a UID has been scored + self.score_counts = {} self.moving_average_scores = None # Set up axon and dendrite @@ -71,10 +68,6 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): # Initialize shared query database self.query_database = [] - # Get network tempo - self.tempo = self.subtensor.tempo(self.netuid) - self.weights_rate_limit = self.get_weights_rate_limit() - # initialize uid and capacities. asyncio.run(self.initialize_uids_and_capacities()) # Set up async tasks @@ -117,7 +110,7 @@ async def perform_synthetic_queries(self): while True: # wait for MIN_REQUEST_PERIOD minutes. await asyncio.sleep(cortext.REQUEST_PERIOD * 60) - bt.logging.info(f"start processing synthetic queries at block {self.get_current_block()} at time {time.time()}") + bt.logging.info(f"start processing synthetic queries at block {self.metagraph.block} at time {time.time()}") start_time = time.time() # check available bandwidth and send synthetic requests to all miners. query_tasks = [] @@ -178,7 +171,6 @@ def choose_validator_from_model(self, model): # else: # return image_validator - async def get_capacities_for_uids(self, uids): capacity_service = CapacityService(metagraph=self.metagraph, dendrite=self.dendrite) uid_to_capacity = await capacity_service.query_capacity_to_miners(uids) @@ -353,8 +345,7 @@ async def _prompt(query_synapse: StreamPrompting, send: Send): bt.logging.trace(f"task is created and task_id is {task_id}") response_text = '' - - async for chunk in get_stream_result_as_async_gen(task_id=task_id): + async for chunk in get_stream_as_async_gen(task_id): if isinstance(chunk, str): await send({ "type": "http.response.body", @@ -362,7 +353,7 @@ async def _prompt(query_synapse: StreamPrompting, send: Send): "more_body": True, }) response_text += chunk - bt.logging.trace(response_text) + bt.logging.trace(f"received total response is :{response_text}") await send({"type": "http.response.body", "body": b'', "more_body": False}) diff --git a/validators/workers.py b/validators/workers.py index 0f4b3377..556338a2 100644 --- a/validators/workers.py +++ b/validators/workers.py @@ -17,7 +17,6 @@ async def run_task(self): task_id = self.synapse.task_id bt.logging.trace(f"Worker {task_id} received task: {self.synapse}") - await self.dendrite.aclose_session() responses = await self.dendrite( axons=[self.axon], synapse=self.synapse, @@ -25,10 +24,13 @@ async def run_task(self): timeout=self.synapse.timeout, streaming=self.synapse.streaming, ) + full_resp = '' if self.synapse.streaming: async for chunk in responses[0]: if isinstance(chunk, str): redis_client.xadd(REDIS_RESULT_STREAM + f"{task_id}", {"chunk": chunk}) + full_resp += chunk else: redis_client.rpush(REDIS_RESULT, responses[0]) redis_client.close() + bt.logging.debug(f"worker sends this {full_resp}: {self.synapse.uid}") From 495eeb155c54b5c4f75080a5dd8c5b3188ef92d0 Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 27 Sep 2024 07:37:17 -0700 Subject: [PATCH 50/80] update readme to install and start redis-server --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index dd17332b..298dd2cc 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,8 @@ Go to the [Pixabay API docs](https://pixabay.com/api/docs/) and sign in with you Before starting make sure update your system and have pm2 installed to run the scripts in the background. ```bash -apt update -y && apt-get install git -y && apt install python3-pip -y +apt update -y && apt-get install git -y && apt install python3-pip -y && apt install redis-server + ``` Download the repository, navigate to the folder and then create virtual env and install the necessary requirements with the following chained command. From 2fcf6f198bcf7858e139cd823d730b73d1d6b8e2 Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 27 Sep 2024 11:00:33 -0700 Subject: [PATCH 51/80] update reward a little bit and run scoring system in seperate thread --- cortext/reward.py | 13 +++++++----- validators/utils.py | 41 ++++++++++++++++++------------------- validators/weight_setter.py | 35 +++++++++++++++++++------------ 3 files changed, 50 insertions(+), 39 deletions(-) diff --git a/cortext/reward.py b/cortext/reward.py index 24bd52b8..0fe11f5f 100644 --- a/cortext/reward.py +++ b/cortext/reward.py @@ -18,8 +18,8 @@ # DEALINGS IN THE SOFTWARE. from __future__ import annotations from transformers import logging as hf_logging -hf_logging.set_verbosity_error() +hf_logging.set_verbosity_error() import re import io @@ -37,10 +37,13 @@ from sklearn.feature_extraction.text import TfidfVectorizer from transformers import CLIPProcessor, CLIPModel + # ==== TEXT ==== def calculate_text_similarity(text1: str, text2: str): try: + text1 = str(text1).lower() + text2 = str(text2).lower() # Initialize the TF-IDF Vectorizer vectorizer = TfidfVectorizer() @@ -56,6 +59,7 @@ def calculate_text_similarity(text1: str, text2: str): bt.logging.error(f"Error in calculate_text_similarity: {traceback.format_exc()}") raise + async def api_score(api_answer: str, response: str, weight: float, temperature: float, provider: str) -> float: try: if api_answer is None or response is None: @@ -153,6 +157,7 @@ def calculate_image_similarity(image, description, max_length: int = 77): # Calculate cosine similarity return torch.cosine_similarity(image_embedding, text_embedding, dim=1).item() + async def dalle_score(uid, url, desired_size, description, weight, similarity_threshold=0.21) -> float: """Calculate the image score based on similarity and size asynchronously.""" @@ -191,11 +196,11 @@ async def dalle_score(uid, url, desired_size, description, weight, similarity_th return 0 - # IMAGES ---- DETERMINISTIC async def deterministic_score(uid: int, syn, weight: float): - vali_b64s = await utils.call_stability(syn.messages, syn.seed, syn.steps, syn.cfg_scale, syn.width, syn.height, syn.samples, syn.sampler) + vali_b64s = await utils.call_stability(syn.messages, syn.seed, syn.steps, syn.cfg_scale, syn.width, syn.height, + syn.samples, syn.sampler) for miner_b64, vali_b64 in zip(syn.completion["b64s"], vali_b64s): if miner_b64[:50] != vali_b64[:50]: @@ -206,7 +211,6 @@ async def deterministic_score(uid: int, syn, weight: float): return weight - # ==== Embeddings ===== async def embeddings_score(openai_answer: list, response: list, weight: float, threshold: float = .95) -> float: @@ -214,7 +218,6 @@ async def embeddings_score(openai_answer: list, response: list, weight: float, t bt.logging.info("The number of embeddings in openai_answer and response do not match.") return 0 - # Calculate similarity for each pair of embeddings similarities = [] for oa_emb, resp_emb in zip(openai_answer, response): diff --git a/validators/utils.py b/validators/utils.py index 85d362dc..2432e356 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -14,7 +14,7 @@ import traceback from cortext import ImageResponse, ALL_SYNAPSE_TYPE, REDIS_RESULT_STREAM -from validators.services.cache import cache_service +from validators.services.cache import QueryResponseCache async def download_image(url): @@ -77,6 +77,8 @@ async def wrapper(*args, **kwargs): query_syn: ALL_SYNAPSE_TYPE = args[2] provider = query_syn.provider model = query_syn.model + + cache_service = QueryResponseCache() try: cache_service.set_cache(question=str(query_syn.json()), answer=str(answer), provider=provider, model=model) except Exception as err: @@ -132,7 +134,7 @@ async def get_stream_as_async_gen(task_id): bt.logging.trace(result_chunk) full_response += result_chunk yield result_chunk - result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=50) + result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=20) bt.logging.debug(f"stream exit. delete old stream from queue. {full_response}") redis_client.delete(stream_name) @@ -141,31 +143,28 @@ async def get_stream_as_async_gen(task_id): @error_handler async def get_stream_result(task_id): - redis_client = get_redis_client() last_id = '0' # Start reading from the beginning of the stream bt.logging.trace(f"Waiting for results of task {task_id}...") stream_name = REDIS_RESULT_STREAM + f"{task_id}" - full_response = "" - start_time = time.time() - while True: + redis_client = get_redis_client() + full_response = '' + result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=50) + + while result_entries: # Read from the Redis stream - result_entries = redis_client.xread({stream_name: last_id}, block=5000) - result_entries = result_entries or [] - if result_entries: - for entry in result_entries: - stream_name, results = entry - for result_id, data in results: - result_chunk = data['chunk'] - last_id = result_id - bt.logging.trace(result_chunk) - full_response += result_chunk - else: - bt.logging.trace("No new results. stop generation.") - break - bt.logging.trace(f"stream exit. delete old stream from queue.") + for entry in result_entries: + stream_name, results = entry + for result_id, data in results: + result_chunk = data['chunk'] + last_id = result_id + bt.logging.trace(result_chunk) + full_response += result_chunk + result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=20) + + bt.logging.debug(f"stream exit. delete old stream from queue.") redis_client.delete(stream_name) redis_client.close() - return full_response, time.time() - start_time + return full_response, 0 def find_positive_values(data: dict): diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 74e60bb5..6e940aa9 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -10,6 +10,7 @@ from typing import Tuple, List import bittensor as bt from bittensor import StreamingSynapse +import threading import cortext from starlette.types import Send @@ -74,10 +75,20 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): self.thread_executor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix='asyncio') self.loop.create_task(self.consume_organic_queries()) self.loop.create_task(self.perform_synthetic_queries()) - self.loop.create_task(self.process_queries_from_database()) + # Start the thread with a new event loop + thread = threading.Thread(target=self.run_scoring_event_loop) + thread.start() + + # Function to run the event loop + def run_scoring_event_loop(self): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) # Set the new event loop for this thread + loop.run_until_complete(self.process_queries_from_database()) + loop.close() async def run_sync_in_async(self, fn): - return await self.loop.run_in_executor(self.thread_executor, fn) + loop = asyncio.get_event_loop() + return await loop.run_in_executor(None, fn) async def refresh_metagraph(self): await self.run_sync_in_async(lambda: self.metagraph.sync()) @@ -211,13 +222,12 @@ async def update_weights(self): avg_scores = {} # Compute average scores per UID - async with self.lock: - for uid in self.total_scores: - count = self.score_counts[uid] - if count > 0: - avg_scores[uid] = self.total_scores[uid] / count - else: - avg_scores[uid] = 0.0 + for uid in self.total_scores: + count = self.score_counts[uid] + if count > 0: + avg_scores[uid] = self.total_scores[uid] / count + else: + avg_scores[uid] = 0.0 bt.logging.info(f"Average scores = {avg_scores}") @@ -432,11 +442,10 @@ async def process_queries_from_database(self): if not self.query_database or not self.synthetic_task_done: bt.logging.trace("no data in query_database. so continue...") continue + bt.logging.info(f"start scoring process for {len(self.query_database)} query_resps. {self.query_database}") - async with self.lock: - # Copy queries to process and clear the database - queries_to_process = self.query_database.copy() - self.query_database.clear() + queries_to_process = self.query_database.copy() + self.query_database.clear() self.synthetic_task_done = False From 40900036379179c851e0cd643c90d06322a3726d Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 27 Sep 2024 11:14:18 -0700 Subject: [PATCH 52/80] run scoring module in main loop --- validators/weight_setter.py | 33 +++++++++++++-------------------- validators/workers.py | 1 - 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 6e940aa9..cf8c4061 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -75,20 +75,11 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): self.thread_executor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix='asyncio') self.loop.create_task(self.consume_organic_queries()) self.loop.create_task(self.perform_synthetic_queries()) - # Start the thread with a new event loop - thread = threading.Thread(target=self.run_scoring_event_loop) - thread.start() + self.loop.create_task(self.process_queries_from_database()) - # Function to run the event loop - def run_scoring_event_loop(self): - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) # Set the new event loop for this thread - loop.run_until_complete(self.process_queries_from_database()) - loop.close() async def run_sync_in_async(self, fn): - loop = asyncio.get_event_loop() - return await loop.run_in_executor(None, fn) + return await self.loop.run_in_executor(None, fn) async def refresh_metagraph(self): await self.run_sync_in_async(lambda: self.metagraph.sync()) @@ -222,12 +213,13 @@ async def update_weights(self): avg_scores = {} # Compute average scores per UID - for uid in self.total_scores: - count = self.score_counts[uid] - if count > 0: - avg_scores[uid] = self.total_scores[uid] / count - else: - avg_scores[uid] = 0.0 + async with self.lock: + for uid in self.total_scores: + count = self.score_counts[uid] + if count > 0: + avg_scores[uid] = self.total_scores[uid] / count + else: + avg_scores[uid] = 0.0 bt.logging.info(f"Average scores = {avg_scores}") @@ -442,10 +434,11 @@ async def process_queries_from_database(self): if not self.query_database or not self.synthetic_task_done: bt.logging.trace("no data in query_database. so continue...") continue - bt.logging.info(f"start scoring process for {len(self.query_database)} query_resps. {self.query_database}") + bt.logging.info(f"start scoring process...") - queries_to_process = self.query_database.copy() - self.query_database.clear() + async with self.lock: + queries_to_process = self.query_database.copy() + self.query_database.clear() self.synthetic_task_done = False diff --git a/validators/workers.py b/validators/workers.py index 556338a2..1befb830 100644 --- a/validators/workers.py +++ b/validators/workers.py @@ -33,4 +33,3 @@ async def run_task(self): else: redis_client.rpush(REDIS_RESULT, responses[0]) redis_client.close() - bt.logging.debug(f"worker sends this {full_resp}: {self.synapse.uid}") From e8e4b03e82f0f017c1a8ef5e8ba9a844e0ed1aec Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 27 Sep 2024 11:22:36 -0700 Subject: [PATCH 53/80] increase duration of redis stream --- validators/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/utils.py b/validators/utils.py index 2432e356..746f6862 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -159,7 +159,7 @@ async def get_stream_result(task_id): last_id = result_id bt.logging.trace(result_chunk) full_response += result_chunk - result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=20) + result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=50) bt.logging.debug(f"stream exit. delete old stream from queue.") redis_client.delete(stream_name) From 74239794597f8af9fe96c4cce5aad78cadb5d8ed Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 27 Sep 2024 12:11:16 -0700 Subject: [PATCH 54/80] update weights every epoch. --- validators/weight_setter.py | 41 +++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index cf8c4061..fc9b64ed 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -10,7 +10,6 @@ from typing import Tuple, List import bittensor as bt from bittensor import StreamingSynapse -import threading import cortext from starlette.types import Send @@ -62,6 +61,10 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): bt.logging.info(f"Axon server started on port {self.config.axon.port}") self.dendrite = config.dendrite + # Get network tempo + self.tempo = self.subtensor.tempo(self.netuid) + self.weights_rate_limit = self.node_query('SubtensorModule', 'WeightsSetRateLimit', [self.netuid]) + # Set up async-related attributes self.lock = asyncio.Lock() self.loop = loop or asyncio.get_event_loop() @@ -76,7 +79,7 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): self.loop.create_task(self.consume_organic_queries()) self.loop.create_task(self.perform_synthetic_queries()) self.loop.create_task(self.process_queries_from_database()) - + self.loop.create_task(self.update_and_refresh()) async def run_sync_in_async(self, fn): return await self.loop.run_in_executor(None, fn) @@ -101,12 +104,35 @@ async def initialize_uids_and_capacities(self): self.task_mgr = TaskMgr(uid_to_capacities=self.uid_to_capacity, dendrite=self.dendrite, metagraph=self.metagraph, loop=self.loop) + def node_query(self, module, method, params): + try: + result = self.node.query(module, method, params).value + + except Exception as err: + # reinitilize node + self.node = SubstrateInterface(url=self.config.subtensor.chain_endpoint) + result = self.node.query(module, method, params).value + + return result + + def get_blocks_til_epoch(self, block): + return self.tempo - (block + 19) % (self.tempo + 1) + async def update_and_refresh(self): - await self.update_weights() - bt.logging.info("Refreshing metagraph...") - await self.refresh_metagraph() - await self.initialize_uids_and_capacities() - bt.logging.info("Metagraph refreshed.") + while True: + current_block = self.node_query('System', 'Number', []) + last_update = current_block - self.node_query('SubtensorModule', 'LastUpdate', [self.netuid])[self.my_uid] + bt.logging.info(f"last update: {last_update} blocks ago") + + if last_update >= self.tempo * 2 or ( + self.get_blocks_til_epoch(current_block) < 10 and last_update >= self.weights_rate_limit): + await self.update_weights() + bt.logging.info("Refreshing metagraph...") + await self.refresh_metagraph() + await self.initialize_uids_and_capacities() + bt.logging.info("Metagraph refreshed.") + + await asyncio.sleep(60) async def perform_synthetic_queries(self): while True: @@ -454,7 +480,6 @@ async def process_queries_from_database(self): self.total_scores[uid] += score self.score_counts[uid] += 1 bt.logging.info(f"current total score are {self.total_scores}") - await self.update_and_refresh() @property def batch_list_of_all_uids(self): From 4b633c514c48c28bbe6750fff4136c4d5890a301 Mon Sep 17 00:00:00 2001 From: acer-king Date: Fri, 27 Sep 2024 23:36:59 -0700 Subject: [PATCH 55/80] score all resps per epoch --- .../services/validators/base_validator.py | 5 --- .../services/validators/text_validator.py | 6 ++- validators/weight_setter.py | 41 ++++++++++--------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index 1c49c284..eec3c3df 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -123,11 +123,6 @@ def get_uid_to_scores_dict(self, uid_to_query_resps, scored_responses: tuple[flo avg_score = sum(scores) / len(scores) uid_provider_model_scores_avg_dict[key] = avg_score - # total_weights = 0 - # for provider, model_infos in TEXT_VALI_MODELS_WEIGHTS.items(): - # for model in model_infos: - # total_weights += model_infos.get(model) - # apply weight for each model and calculate score based on weight of models. uid_scores_dict = defaultdict(float) for key, avg_score in uid_provider_model_scores_avg_dict.items(): diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index a743c370..d29398a8 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -116,7 +116,9 @@ async def build_wandb_data(self, uid_to_score, responses): self.wandb_data["responses"][uid] = response return self.wandb_data - async def call_api(self, prompt: str, image_url: Optional[str], provider: str) -> str: + async def call_api(self, prompt: str, image_url: Optional[str], query_syn: StreamPrompting) -> str: + provider = query_syn.provider + self.model = query_syn.model if provider == "OpenAI": return await call_openai( [{"role": "user", "content": prompt, "image": image_url}], self.temperature, self.model, self.seed, @@ -161,7 +163,7 @@ async def call_api(self, prompt: str, image_url: Optional[str], provider: str) - async def get_answer_task(self, uid: int, query_syn: StreamPrompting, response): prompt = query_syn.messages[0].get("content") image_url = query_syn.messages[0].get("image") - answer = await self.call_api(prompt, image_url, self.provider) + answer = await self.call_api(prompt, image_url, query_syn) return answer async def get_scoring_task(self, uid, answer, response): diff --git a/validators/weight_setter.py b/validators/weight_setter.py index fc9b64ed..4024d5f2 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -79,7 +79,6 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): self.loop.create_task(self.consume_organic_queries()) self.loop.create_task(self.perform_synthetic_queries()) self.loop.create_task(self.process_queries_from_database()) - self.loop.create_task(self.update_and_refresh()) async def run_sync_in_async(self, fn): return await self.loop.run_in_executor(None, fn) @@ -118,27 +117,28 @@ def node_query(self, module, method, params): def get_blocks_til_epoch(self, block): return self.tempo - (block + 19) % (self.tempo + 1) - async def update_and_refresh(self): - while True: - current_block = self.node_query('System', 'Number', []) - last_update = current_block - self.node_query('SubtensorModule', 'LastUpdate', [self.netuid])[self.my_uid] - bt.logging.info(f"last update: {last_update} blocks ago") - - if last_update >= self.tempo * 2 or ( - self.get_blocks_til_epoch(current_block) < 10 and last_update >= self.weights_rate_limit): - await self.update_weights() - bt.logging.info("Refreshing metagraph...") - await self.refresh_metagraph() - await self.initialize_uids_and_capacities() - bt.logging.info("Metagraph refreshed.") + def is_epoch_end(self): + current_block = self.node_query('System', 'Number', []) + last_update = current_block - self.node_query('SubtensorModule', 'LastUpdate', [self.netuid])[self.my_uid] + bt.logging.info(f"last update: {last_update} blocks ago") + if last_update >= self.tempo * 2 or ( + self.get_blocks_til_epoch(current_block) < 10 and last_update >= self.weights_rate_limit): + return True + return False - await asyncio.sleep(60) + async def update_and_refresh(self): + await self.update_weights() + bt.logging.info("Refreshing metagraph...") + await self.refresh_metagraph() + await self.initialize_uids_and_capacities() + bt.logging.info("Metagraph refreshed.") async def perform_synthetic_queries(self): while True: # wait for MIN_REQUEST_PERIOD minutes. await asyncio.sleep(cortext.REQUEST_PERIOD * 60) - bt.logging.info(f"start processing synthetic queries at block {self.metagraph.block} at time {time.time()}") + current_block = self.node_query('System', 'Number', []) + bt.logging.info(f"start processing synthetic queries at block {current_block} at time {time.time()}") start_time = time.time() # check available bandwidth and send synthetic requests to all miners. query_tasks = [] @@ -188,13 +188,15 @@ async def perform_synthetic_queries(self): self.synthetic_task_done = True bt.logging.info( - f"synthetic queries has been processed successfully. total queries are {len(query_synapses)}") + f"synthetic queries has been processed successfully." + f"total queries are {len(query_synapses)}") def choose_validator_from_model(self, model): text_validator = ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) # image_validator = ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, # metagraph=self.metagraph) if model != 'dall-e-3': + text_validator.model = model return text_validator # else: # return image_validator @@ -455,11 +457,11 @@ def get_scoring_tasks_from_query_responses(self, queries_to_process): async def process_queries_from_database(self): while True: await asyncio.sleep(1) # Adjust the sleep time as needed - # accumulate all query results for MIN_REQUEST_PERIOD - if not self.query_database or not self.synthetic_task_done: + if not self.query_database or not self.is_epoch_end(): bt.logging.trace("no data in query_database. so continue...") continue + bt.logging.info(f"start scoring process...") async with self.lock: @@ -480,6 +482,7 @@ async def process_queries_from_database(self): self.total_scores[uid] += score self.score_counts[uid] += 1 bt.logging.info(f"current total score are {self.total_scores}") + await self.update_and_refresh() @property def batch_list_of_all_uids(self): From 81ff90d5a023a46315a3fb911df3b28f26cf1da4 Mon Sep 17 00:00:00 2001 From: acer-king Date: Sat, 28 Sep 2024 01:42:13 -0700 Subject: [PATCH 56/80] remove redis and handle tasks using only async --- README.md | 2 +- requirements.txt | 3 +- .../services/validators/base_validator.py | 5 +- validators/task_manager.py | 7 +- validators/utils.py | 57 ------- validators/weight_setter.py | 142 ++++++++++++------ validators/workers.py | 35 ----- 7 files changed, 98 insertions(+), 153 deletions(-) delete mode 100644 validators/workers.py diff --git a/README.md b/README.md index 298dd2cc..180ada86 100644 --- a/README.md +++ b/README.md @@ -199,7 +199,7 @@ Go to the [Pixabay API docs](https://pixabay.com/api/docs/) and sign in with you Before starting make sure update your system and have pm2 installed to run the scripts in the background. ```bash -apt update -y && apt-get install git -y && apt install python3-pip -y && apt install redis-server +apt update -y && apt-get install git -y && apt install python3-pip -y ``` diff --git a/requirements.txt b/requirements.txt index 5a2a7d3c..5e2a3f2d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,5 +15,4 @@ anthropic_bedrock pyOpenSSL==24.* google-generativeai groq==0.5.0 -aioboto3 -redis>=4.2.0 \ No newline at end of file +aioboto3 \ No newline at end of file diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index eec3c3df..d2965cfe 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -138,9 +138,10 @@ def get_uid_to_scores_dict(self, uid_to_query_resps, scored_responses: tuple[flo bt.logging.debug(f"no band_width found for this uid {uid}") band_width = 1 bt.logging.debug(f"bandwidth is {band_width}") - uid_scores_dict[uid] += avg_score * model_weight * band_width + weighted_score = avg_score * model_weight * band_width + uid_scores_dict[uid] += weighted_score bt.logging.debug(f"score {avg_score} for this model {model}, " - f"and weighted_score is {uid_scores_dict[uid]}") + f"and weighted_score is {weighted_score}") if not len(uid_scores_dict): validator_type = self.__class__.__name__ diff --git a/validators/task_manager.py b/validators/task_manager.py index 31a3cf66..f7eec21a 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -4,7 +4,6 @@ from cortext import ALL_SYNAPSE_TYPE from validators.utils import error_handler -from validators.workers import Worker from validators import utils @@ -46,11 +45,7 @@ def assign_task(self, synapse: ALL_SYNAPSE_TYPE): synapse.task_id = task_id bt.logging.trace(f"Assigning task {task_id} to miner {uid}") - - # Push task to the selected worker's task queue - worker = Worker(synapse=synapse, dendrite=self.dendrite, axon=self.get_axon_from_uid(uid=uid)) - self.loop.create_task(worker.run_task()) - return task_id + return uid def get_axon_from_uid(self, uid): uid = int(uid) diff --git a/validators/utils.py b/validators/utils.py index 746f6862..e92b21c5 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -115,58 +115,6 @@ async def get_result_entry_from_redis(redis_client, stream_name, last_id, max_tr return result_entries -@error_handler -async def get_stream_as_async_gen(task_id): - last_id = '0' # Start reading from the beginning of the stream - bt.logging.trace(f"Waiting for results of task {task_id}...") - stream_name = REDIS_RESULT_STREAM + f"{task_id}" - redis_client = get_redis_client() - full_response = '' - result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=50) - - while result_entries: - # Read from the Redis stream - for entry in result_entries: - stream_name, results = entry - for result_id, data in results: - result_chunk = data['chunk'] - last_id = result_id - bt.logging.trace(result_chunk) - full_response += result_chunk - yield result_chunk - result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=20) - - bt.logging.debug(f"stream exit. delete old stream from queue. {full_response}") - redis_client.delete(stream_name) - redis_client.close() - - -@error_handler -async def get_stream_result(task_id): - last_id = '0' # Start reading from the beginning of the stream - bt.logging.trace(f"Waiting for results of task {task_id}...") - stream_name = REDIS_RESULT_STREAM + f"{task_id}" - redis_client = get_redis_client() - full_response = '' - result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=50) - - while result_entries: - # Read from the Redis stream - for entry in result_entries: - stream_name, results = entry - for result_id, data in results: - result_chunk = data['chunk'] - last_id = result_id - bt.logging.trace(result_chunk) - full_response += result_chunk - result_entries = await get_result_entry_from_redis(redis_client, stream_name, last_id, max_try_cnt=50) - - bt.logging.debug(f"stream exit. delete old stream from queue.") - redis_client.delete(stream_name) - redis_client.close() - return full_response, 0 - - def find_positive_values(data: dict): positive_values = {} @@ -181,8 +129,3 @@ def find_positive_values(data: dict): positive_values[key] = value return positive_values - - -def get_redis_client(): - redis_client = redis.from_url("redis://localhost", encoding="utf-8", decode_responses=True) - return redis_client diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 4024d5f2..b40ed12b 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -18,9 +18,8 @@ from cortext.metaclasses import ValidatorRegistryMeta from validators.services import CapacityService, BaseValidator, TextValidator, ImageValidator from validators.services.cache import QueryResponseCache -from validators.utils import error_handler, get_stream_as_async_gen, get_stream_result +from validators.utils import error_handler from validators.task_manager import TaskMgr -from validators.models.enum import QueryType scoring_organic_timeout = 60 @@ -133,6 +132,50 @@ async def update_and_refresh(self): await self.initialize_uids_and_capacities() bt.logging.info("Metagraph refreshed.") + async def query_miner(self, uid, query_syn: cortext.ALL_SYNAPSE_TYPE): + if query_syn.streaming: + uid = self.task_mgr.assign_task(query_syn) + if uid is None: + bt.logging.error("Can't create task.") + return + bt.logging.trace(f"synthetic task is created and uid is {uid}") + + async def handle_response(responses): + start_time = time.time() + response_text = '' + for resp in responses: + async for chunk in resp: + if isinstance(chunk, str): + response_text += chunk + bt.logging.trace(f"Streamed text: {chunk}") + bt.logging.debug(f"{len(response_text)} for uid {uid}") + + + # Store the query and response in the shared database + async with self.lock: + self.query_database.append({ + 'uid': uid, + 'synapse': query_syn, + 'response': (response_text, time.time() - start_time), + 'query_type': 'organic', + 'timestamp': asyncio.get_event_loop().time(), + 'validator': ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, + metagraph=self.metagraph) + }) + + + axon = self.metagraph.axons[uid] + responses = await self.dendrite( + axons=[axon], + synapse=query_syn, + deserialize=False, + timeout=query_syn.timeout, + streaming=True, + ) + await handle_response(responses) + else: + pass + async def perform_synthetic_queries(self): while True: # wait for MIN_REQUEST_PERIOD minutes. @@ -157,34 +200,19 @@ async def perform_synthetic_queries(self): bt.logging.debug(f"No query tasks for synthetic.") continue # don't process any organic query while processing synthetic queries. - synthetic_task_ids = [] + synthetic_tasks = [] async with self.lock: query_synapses = await asyncio.gather(*query_tasks) for query_syn in query_synapses: - task_id = self.task_mgr.assign_task(query_syn) - synthetic_task_ids.append(task_id) + uid = self.task_mgr.assign_task(query_syn) + synthetic_tasks.append(self.query_miner(uid, query_syn)) + + bt.logging.debug(f"{time.time() - start_time} elapsed for creating and submitting synthetic queries.") # restore capacities immediately after synthetic query consuming all bandwidth. self.task_mgr.restore_capacities_for_all_miners() - bt.logging.debug(f"{time.time() - start_time} elapsed for creating and submitting synthetic queries.") - - # get result from all synthetic tasks - synthetic_result_tasks = [] - for task_id in synthetic_task_ids: - task = get_stream_result(task_id=task_id) - synthetic_result_tasks.append(task) - - synthetic_results = await asyncio.gather(*synthetic_result_tasks) - for synapse, (result, time_process) in zip(query_synapses, synthetic_results): - self.query_database.append({ - 'uid': synapse.uid, - 'synapse': synapse, - 'response': (result, time_process), - 'query_type': QueryType.synthetic_type, - 'timestamp': asyncio.get_event_loop().time(), - 'validator': self.choose_validator_from_model(synapse.model) - }) + await asyncio.gather(*synthetic_tasks) self.synthetic_task_done = True bt.logging.info( @@ -362,42 +390,56 @@ async def prompt(self, synapse: StreamPrompting) -> StreamingSynapse.BTStreaming # Return the streaming response async def _prompt(query_synapse: StreamPrompting, send: Send): bt.logging.info(f"Sending {synapse} request to uid: {synapse.uid}") - start_time = time.time() synapse.deserialize_flag = False synapse.streaming = True - task_id = self.task_mgr.assign_task(query_synapse) - if task_id is None: + uid = self.task_mgr.assign_task(query_synapse) + if uid is None: bt.logging.error("Can't create task.") await send({"type": "http.response.body", "body": b'', "more_body": False}) return - bt.logging.trace(f"task is created and task_id is {task_id}") - - response_text = '' - async for chunk in get_stream_as_async_gen(task_id): - if isinstance(chunk, str): - await send({ - "type": "http.response.body", - "body": chunk.encode("utf-8"), - "more_body": True, + bt.logging.trace(f"task is created and uid is {uid}") + + async def handle_response(responses): + start_time = time.time() + response_text = '' + for resp in responses: + async for chunk in resp: + if isinstance(chunk, str): + await send({ + "type": "http.response.body", + "body": chunk.encode("utf-8"), + "more_body": True, + }) + response_text += chunk + bt.logging.trace(f"Streamed text: {chunk}") + + # Store the query and response in the shared database + async with self.lock: + self.query_database.append({ + 'uid': synapse.uid, + 'synapse': synapse, + 'response': (response_text, time.time() - start_time), + 'query_type': 'organic', + 'timestamp': asyncio.get_event_loop().time(), + 'validator': ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, + metagraph=self.metagraph) }) - response_text += chunk - bt.logging.trace(f"received total response is :{response_text}") - await send({"type": "http.response.body", "body": b'', "more_body": False}) + await send({"type": "http.response.body", "body": b'', "more_body": False}) + + + axon = self.metagraph.axons[synapse.uid] + responses = await self.dendrite( + axons=[axon], + synapse=synapse, + deserialize=False, + timeout=synapse.timeout, + streaming=True, + ) + return await handle_response(responses) - # Store the query and response in the shared database - async with self.lock: - self.query_database.append({ - 'uid': synapse.uid, - 'synapse': synapse, - 'response': (response_text, time.time() - start_time), - 'query_type': 'organic', - 'timestamp': asyncio.get_event_loop().time(), - 'validator': ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, - metagraph=self.metagraph) - }) token_streamer = partial(_prompt, synapse) return synapse.create_streaming_response(token_streamer) @@ -458,7 +500,7 @@ async def process_queries_from_database(self): while True: await asyncio.sleep(1) # Adjust the sleep time as needed # accumulate all query results for MIN_REQUEST_PERIOD - if not self.query_database or not self.is_epoch_end(): + if not self.synthetic_task_done or not self.is_epoch_end(): bt.logging.trace("no data in query_database. so continue...") continue diff --git a/validators/workers.py b/validators/workers.py deleted file mode 100644 index 1befb830..00000000 --- a/validators/workers.py +++ /dev/null @@ -1,35 +0,0 @@ -import bittensor as bt -from cortext import REDIS_RESULT_STREAM, REDIS_RESULT -from validators.utils import get_redis_client, error_handler - - -class Worker: - - def __init__(self, synapse, dendrite, axon): - self.synapse = synapse - self.dendrite = dendrite - self.axon = axon - - @error_handler - async def run_task(self): - # Pull task from worker-specific queue - redis_client = get_redis_client() - task_id = self.synapse.task_id - bt.logging.trace(f"Worker {task_id} received task: {self.synapse}") - - responses = await self.dendrite( - axons=[self.axon], - synapse=self.synapse, - deserialize=self.synapse.deserialize_flag, - timeout=self.synapse.timeout, - streaming=self.synapse.streaming, - ) - full_resp = '' - if self.synapse.streaming: - async for chunk in responses[0]: - if isinstance(chunk, str): - redis_client.xadd(REDIS_RESULT_STREAM + f"{task_id}", {"chunk": chunk}) - full_resp += chunk - else: - redis_client.rpush(REDIS_RESULT, responses[0]) - redis_client.close() From a0b41f24dddae25c7bdad26349f2aa23f374de2f Mon Sep 17 00:00:00 2001 From: acer-king Date: Sat, 28 Sep 2024 03:02:50 -0700 Subject: [PATCH 57/80] fix issues of task_manager --- validators/task_manager.py | 15 ++++++--------- validators/weight_setter.py | 6 +++--- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/validators/task_manager.py b/validators/task_manager.py index f7eec21a..352121b9 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -17,8 +17,8 @@ def __init__(self, uid_to_capacities, dendrite, metagraph, loop): self.loop = loop def restore_capacities_for_all_miners(self): - bt.logging.debug(f"resource is restored. self.remain_resources = {self.remain_resources}") self.remain_resources = deepcopy(self.uid_to_capacity) + bt.logging.debug(f"resource is restored. remain_resources = {self.remain_resources}") def update_remain_capacity_based_on_new_capacity(self, new_uid_to_capacity): for uid, capacity in new_uid_to_capacity.items(): @@ -31,6 +31,7 @@ def update_remain_capacity_based_on_new_capacity(self, new_uid_to_capacity): if diff: bt.logging.debug(f"diff {diff} found in {uid}, {provider}, {model}") self.remain_resources[uid][provider][model] -= diff + bt.logging.debug(f"remain_resources after epoch = {self.remain_resources}") @error_handler def assign_task(self, synapse: ALL_SYNAPSE_TYPE): @@ -39,12 +40,7 @@ def assign_task(self, synapse: ALL_SYNAPSE_TYPE): if uid is None: bt.logging.debug(f"no available resources to process this request.") return None - - synapse.uid = uid - task_id = utils.create_hash_value((synapse.json())) - synapse.task_id = task_id - - bt.logging.trace(f"Assigning task {task_id} to miner {uid}") + bt.logging.trace(f"Assigning task to miner {uid}") return uid def get_axon_from_uid(self, uid): @@ -54,9 +50,10 @@ def get_axon_from_uid(self, uid): def choose_miner(self, synapse: ALL_SYNAPSE_TYPE): provider = synapse.provider model = synapse.model - for uid, capacity in self.remain_resources.items(): + for uid in self.remain_resources: + capacity = self.remain_resources.get(uid) bandwidth = capacity.get(provider).get(model) if bandwidth is not None and bandwidth > 0: # decrease resource by one after choosing this miner for the request. - capacity[provider][model] -= 1 + self.remain_resources[uid][provider][model] -= 1 return uid diff --git a/validators/weight_setter.py b/validators/weight_setter.py index b40ed12b..8c6a1c30 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -134,7 +134,6 @@ async def update_and_refresh(self): async def query_miner(self, uid, query_syn: cortext.ALL_SYNAPSE_TYPE): if query_syn.streaming: - uid = self.task_mgr.assign_task(query_syn) if uid is None: bt.logging.error("Can't create task.") return @@ -212,6 +211,7 @@ async def perform_synthetic_queries(self): # restore capacities immediately after synthetic query consuming all bandwidth. self.task_mgr.restore_capacities_for_all_miners() + await self.dendrite.aclose_session() await asyncio.gather(*synthetic_tasks) self.synthetic_task_done = True @@ -393,7 +393,6 @@ async def _prompt(query_synapse: StreamPrompting, send: Send): synapse.deserialize_flag = False synapse.streaming = True - uid = self.task_mgr.assign_task(query_synapse) if uid is None: bt.logging.error("Can't create task.") @@ -430,7 +429,8 @@ async def handle_response(responses): await send({"type": "http.response.body", "body": b'', "more_body": False}) - axon = self.metagraph.axons[synapse.uid] + axon = self.metagraph.axons[uid] + await self.dendrite.aclose_session() responses = await self.dendrite( axons=[axon], synapse=synapse, From 17933e4ef7d83e0dc89fc5756e56cdf880f9b724 Mon Sep 17 00:00:00 2001 From: acer-king Date: Sat, 28 Sep 2024 05:32:49 -0700 Subject: [PATCH 58/80] add cache based query and answer for scoring --- validators/services/cache.py | 13 ++++++-- .../services/validators/image_validator.py | 4 +-- .../services/validators/text_validator.py | 5 +-- validators/task_manager.py | 8 +++-- validators/utils.py | 29 +++++++++++++--- validators/weight_setter.py | 33 +++++++++---------- 6 files changed, 63 insertions(+), 29 deletions(-) diff --git a/validators/services/cache.py b/validators/services/cache.py index 25017405..d153b551 100644 --- a/validators/services/cache.py +++ b/validators/services/cache.py @@ -40,11 +40,20 @@ def set_cache(self, question, answer, provider, model, ttl=3600*24): ''', (p_key, question, answer, provider, model, expires_at)) self.conn.commit() + def get_answer(self, question, provider, model): + p_key = self.generate_hash(str(question) + str(provider) + str(model)) + cursor = self.conn.cursor() + cursor.execute(''' + SELECT answer FROM cache WHERE p_key = ? + ''', (p_key,)) + result = cursor.fetchone() + return result[0] if result else None + def get_cache(self, key): cursor = self.conn.cursor() cursor.execute(''' - SELECT value FROM cache WHERE p_key = ? AND timestamp > ? - ''', (key, time.time())) + SELECT * FROM cache WHERE p_key = ? + ''', key) result = cursor.fetchone() return result[0] if result else None diff --git a/validators/services/validators/image_validator.py b/validators/services/validators/image_validator.py index f20942d0..27975ee3 100644 --- a/validators/services/validators/image_validator.py +++ b/validators/services/validators/image_validator.py @@ -5,7 +5,7 @@ import cortext.reward from cortext.protocol import ImageResponse from validators import utils -from validators.utils import error_handler, save_answer_to_cache +from validators.utils import error_handler, save_or_get_answer_from_cache from cortext.utils import get_question import bittensor as bt @@ -82,7 +82,7 @@ async def get_scoring_task(self, uid, answer, response: ImageResponse): score = 0 # cortext.reward.deterministic_score(uid, syn, self.weight) return score - @save_answer_to_cache + @save_or_get_answer_from_cache async def get_answer_task(self, uid, synapse: ImageResponse, response): return synapse diff --git a/validators/services/validators/text_validator.py b/validators/services/validators/text_validator.py index d29398a8..cffe1705 100644 --- a/validators/services/validators/text_validator.py +++ b/validators/services/validators/text_validator.py @@ -11,7 +11,7 @@ from cortext.protocol import StreamPrompting from cortext.utils import (call_anthropic_bedrock, call_bedrock, call_anthropic, call_gemini, call_groq, call_openai, get_question) -from validators.utils import save_answer_to_cache +from validators.utils import save_or_get_answer_from_cache, get_query_synapse_from_cache class TextValidator(BaseValidator): @@ -74,6 +74,7 @@ async def get_question(self, miner_cnt=1): question = await get_question("text", miner_cnt, is_vision_model) return question + @get_query_synapse_from_cache async def create_query(self, uid, provider=None, model=None) -> bt.Synapse: question = await self.get_question() prompt = question.get("prompt") @@ -159,7 +160,7 @@ async def call_api(self, prompt: str, image_url: Optional[str], query_syn: Strea else: bt.logging.error(f"provider {provider} not found") - @save_answer_to_cache + @save_or_get_answer_from_cache async def get_answer_task(self, uid: int, query_syn: StreamPrompting, response): prompt = query_syn.messages[0].get("content") image_url = query_syn.messages[0].get("image") diff --git a/validators/task_manager.py b/validators/task_manager.py index 352121b9..23cf5cb0 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -1,4 +1,5 @@ import asyncio +import random from copy import deepcopy import bittensor as bt @@ -50,10 +51,13 @@ def get_axon_from_uid(self, uid): def choose_miner(self, synapse: ALL_SYNAPSE_TYPE): provider = synapse.provider model = synapse.model + available_uids = [] for uid in self.remain_resources: capacity = self.remain_resources.get(uid) bandwidth = capacity.get(provider).get(model) if bandwidth is not None and bandwidth > 0: # decrease resource by one after choosing this miner for the request. - self.remain_resources[uid][provider][model] -= 1 - return uid + available_uids.append(uid) + uid = random.choice(available_uids) + self.remain_resources[uid][provider][model] -= 1 + return uid diff --git a/validators/utils.py b/validators/utils.py index e92b21c5..2568fc49 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -1,8 +1,6 @@ import random -import time import aiohttp import asyncio -import redis import base64 import hashlib import inspect @@ -70,15 +68,19 @@ async def handle_response_stream(responses) -> tuple[str, str]: return full_response -def save_answer_to_cache(func): +def save_or_get_answer_from_cache(func): @wraps(func) async def wrapper(*args, **kwargs): - answer = await func(*args, **kwargs) query_syn: ALL_SYNAPSE_TYPE = args[2] provider = query_syn.provider model = query_syn.model cache_service = QueryResponseCache() + answer = cache_service.get_answer(question=str(query_syn.json()), provider=provider, model=model) + if answer: + return answer + + answer = await func(*args, **kwargs) try: cache_service.set_cache(question=str(query_syn.json()), answer=str(answer), provider=provider, model=model) except Exception as err: @@ -91,6 +93,25 @@ async def wrapper(*args, **kwargs): return wrapper +def get_query_synapse_from_cache(func): + @wraps(func) + async def wrapper(*args, **kwargs): + cache_service = QueryResponseCache() + vali = args[0] + provider = args[2] + model = args[3] + questions_answers = cache_service.get_all_question_to_answers(provider=provider, model=model) + if not questions_answers or random.random() > 0.5: + query_syn = await func(*args, **kwargs) + return query_syn + # select one of questions_answers + query, answer = random.choice(questions_answers) + query_syn = vali.get_synapse_from_json(query) + return query_syn + + return wrapper + + def create_hash_value(input_string): # Create a SHA-256 hash object based on random and synpase input_string = str(input_string) + str(random.Random().random()) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 8c6a1c30..8dd0ce4c 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -174,6 +174,20 @@ async def handle_response(responses): await handle_response(responses) else: pass + async def create_query_syns_for_remaining_bandwidth(self): + query_tasks = [] + for uid, provider_to_cap in self.task_mgr.remain_resources.items(): + for provider, model_to_cap in provider_to_cap.items(): + for model, bandwidth in model_to_cap.items(): + if bandwidth > 0: + # create task and send remaining requests to the miner + vali = self.choose_validator_from_model(model) + query_task = vali.create_query(uid, provider, model) + query_tasks += [query_task] * bandwidth + else: + continue + query_synapses = await asyncio.gather(*query_tasks) + return query_synapses async def perform_synthetic_queries(self): while True: @@ -182,26 +196,11 @@ async def perform_synthetic_queries(self): current_block = self.node_query('System', 'Number', []) bt.logging.info(f"start processing synthetic queries at block {current_block} at time {time.time()}") start_time = time.time() - # check available bandwidth and send synthetic requests to all miners. - query_tasks = [] - for uid, provider_to_cap in self.task_mgr.remain_resources.items(): - for provider, model_to_cap in provider_to_cap.items(): - for model, bandwidth in model_to_cap.items(): - if bandwidth > 0: - # create task and send remaining requests to the miner - vali = self.choose_validator_from_model(model) - query_task = vali.create_query(uid, provider, model) - query_tasks += [query_task] * bandwidth - else: - continue - - if not query_tasks: - bt.logging.debug(f"No query tasks for synthetic.") - continue # don't process any organic query while processing synthetic queries. synthetic_tasks = [] async with self.lock: - query_synapses = await asyncio.gather(*query_tasks) + # check available bandwidth and send synthetic requests to all miners. + query_synapses = await self.create_query_syns_for_remaining_bandwidth() for query_syn in query_synapses: uid = self.task_mgr.assign_task(query_syn) synthetic_tasks.append(self.query_miner(uid, query_syn)) From 71b21f0045143a6c58f10ba976ecb545dd060753 Mon Sep 17 00:00:00 2001 From: acer-king Date: Sat, 28 Sep 2024 06:07:59 -0700 Subject: [PATCH 59/80] disable cache based query for now --- validators/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/utils.py b/validators/utils.py index 2568fc49..74a4be3c 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -101,7 +101,7 @@ async def wrapper(*args, **kwargs): provider = args[2] model = args[3] questions_answers = cache_service.get_all_question_to_answers(provider=provider, model=model) - if not questions_answers or random.random() > 0.5: + if not questions_answers or random.random() > 0: query_syn = await func(*args, **kwargs) return query_syn # select one of questions_answers From 6998e50a046c1c31f1f7edf9b3e93f7ad2916786 Mon Sep 17 00:00:00 2001 From: acer-king Date: Sat, 28 Sep 2024 11:37:53 -0700 Subject: [PATCH 60/80] fixed issue for new miners and clean some loggings --- cortext/reward.py | 1 - cortext/utils.py | 18 +++++++++--------- .../services/validators/base_validator.py | 9 ++++++--- validators/task_manager.py | 9 ++++++++- validators/utils.py | 18 +++++++++++++++++- validators/weight_setter.py | 8 +------- 6 files changed, 41 insertions(+), 22 deletions(-) diff --git a/cortext/reward.py b/cortext/reward.py index 0fe11f5f..1469a592 100644 --- a/cortext/reward.py +++ b/cortext/reward.py @@ -53,7 +53,6 @@ def calculate_text_similarity(text1: str, text2: str): # Calculate the Cosine Similarity similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] - bt.logging.debug(f"Similarity: {similarity}") return similarity except Exception as e: bt.logging.error(f"Error in calculate_text_similarity: {traceback.format_exc()}") diff --git a/cortext/utils.py b/cortext/utils.py index 2e803125..f97a3119 100644 --- a/cortext/utils.py +++ b/cortext/utils.py @@ -310,7 +310,7 @@ async def get_item_from_list(items, vision): async with list_update_lock: items = state[category][item_type] - bt.logging.debug(f"Queue for {list_type}: {len(items) if items else 0} items") + bt.logging.trace(f"Queue for {list_type}: {len(items) if items else 0} items") item = await get_item_from_list(items, vision) @@ -319,7 +319,7 @@ async def get_item_from_list(items, vision): items = await get_items(category, item_type, theme) bt.logging.trace(f"Items generated: {items}") state[category][item_type] = items - bt.logging.debug(f"Fetched new list for {list_type}, containing {len(items)} items") + bt.logging.trace(f"Fetched new list for {list_type}, containing {len(items)} items") item = await get_item_from_list(items, vision) @@ -457,7 +457,7 @@ def extract_python_list(text: str): async def call_openai(messages, temperature, model, seed=1234, max_tokens=2048, top_p=1): for _ in range(2): - bt.logging.debug( + bt.logging.trace( f"Calling Openai to get answer. Temperature = {temperature}, Model = {model}, Seed = {seed}, Messages = {messages}" ) try: @@ -503,7 +503,7 @@ async def call_openai(messages, temperature, model, seed=1234, max_tokens=2048, async def call_gemini(messages, temperature, model, max_tokens, top_p, top_k): - bt.logging.debug(f"Calling Gemini. Temperature = {temperature}, Model = {model}, Messages = {messages}") + bt.logging.trace(f"Calling Gemini. Temperature = {temperature}, Model = {model}, Messages = {messages}") try: model = genai.GenerativeModel(model) response = model.generate_content( @@ -554,7 +554,7 @@ async def call_gemini(messages, temperature, model, max_tokens, top_p, top_k): async def call_anthropic_bedrock(prompt, temperature, model, max_tokens=2048, top_p=1, top_k=10000): try: - bt.logging.debug( + bt.logging.trace( f"Calling Bedrock via Anthropic. Model = {model}, Prompt = {prompt}, Temperature = {temperature}, Max Tokens = {max_tokens}" ) completion = await anthropic_bedrock_client.completions.create( @@ -610,7 +610,7 @@ async def generate_messages_to_claude(messages): async def call_anthropic(messages, temperature, model, max_tokens, top_p, top_k): try: - bt.logging.info( + bt.logging.trace( f"calling Anthropic for {messages} with temperature: {temperature}, model: {model}, max_tokens: {max_tokens}, top_p: {top_p}, top_k: {top_k}" ) filtered_messages, system_prompt = await generate_messages_to_claude(messages) @@ -633,7 +633,7 @@ async def call_anthropic(messages, temperature, model, max_tokens, top_p, top_k) async def call_groq(messages, temperature, model, max_tokens, top_p, seed): try: - bt.logging.info( + bt.logging.trace( f"calling groq for {messages} with temperature: {temperature}, model: {model}, max_tokens: {max_tokens}, top_p: {top_p}" ) @@ -655,7 +655,7 @@ async def call_groq(messages, temperature, model, max_tokens, top_p, seed): async def call_bedrock(messages, temperature, model, max_tokens, top_p, seed): try: - bt.logging.info( + bt.logging.trace( f"calling AWS Bedrock for {messages} with temperature: {temperature}, model: {model}, max_tokens: {max_tokens}, top_p: {top_p}" ) @@ -746,7 +746,7 @@ async def extract_message(message): async def call_stability(prompt, seed, steps, cfg_scale, width, height, samples, sampler): # bt.logging.info(f"calling stability for {prompt, seed, steps, cfg_scale, width, height, samples, sampler}") - bt.logging.info(f"calling stability for {prompt[:50]}...") + bt.logging.trace(f"calling stability for {prompt[:50]}...") # Run the synchronous stability_api.generate function in a separate thread meta = await asyncio.to_thread( diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index d2965cfe..89ec58a2 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -125,6 +125,7 @@ def get_uid_to_scores_dict(self, uid_to_query_resps, scored_responses: tuple[flo # apply weight for each model and calculate score based on weight of models. uid_scores_dict = defaultdict(float) + uid_model_to_scores_dict = defaultdict(dict) for key, avg_score in uid_provider_model_scores_avg_dict.items(): uid = int(str(key).split("::")[0]) provider = str(key).split("::")[1] @@ -137,11 +138,13 @@ def get_uid_to_scores_dict(self, uid_to_query_resps, scored_responses: tuple[flo if band_width is None: bt.logging.debug(f"no band_width found for this uid {uid}") band_width = 1 - bt.logging.debug(f"bandwidth is {band_width}") weighted_score = avg_score * model_weight * band_width uid_scores_dict[uid] += weighted_score - bt.logging.debug(f"score {avg_score} for this model {model}, " - f"and weighted_score is {weighted_score}") + uid_model_to_scores_dict[uid][model] = weighted_score + bt.logging.debug(f""" + score details for all miners: + {uid_model_to_scores_dict} + """) if not len(uid_scores_dict): validator_type = self.__class__.__name__ diff --git a/validators/task_manager.py b/validators/task_manager.py index 23cf5cb0..705c824f 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -21,12 +21,19 @@ def restore_capacities_for_all_miners(self): self.remain_resources = deepcopy(self.uid_to_capacity) bt.logging.debug(f"resource is restored. remain_resources = {self.remain_resources}") + def get_remaining_bandwidth(self, uid, provider, model): + if self.remain_resources.get(uid): + if self.remain_resources.get(uid).get(provider): + return self.remain_resources.get(uid).get(provider).get(model) + + def update_remain_capacity_based_on_new_capacity(self, new_uid_to_capacity): for uid, capacity in new_uid_to_capacity.items(): for provider, model_to_cap in capacity.items(): for model, cap in model_to_cap.items(): - if self.remain_resources.get(uid).get(provider).get(model) is None: + if self.get_remaining_bandwidth(uid, provider, model) is None: self.remain_resources[uid][provider][model] = cap + utils.update_nested_dict(self.remain_resources, keys=[uid, provider, model], value=cap) else: diff = self.uid_to_capacity[uid][provider][model] - cap if diff: diff --git a/validators/utils.py b/validators/utils.py index 74a4be3c..f1b55302 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -11,7 +11,7 @@ from functools import wraps import traceback -from cortext import ImageResponse, ALL_SYNAPSE_TYPE, REDIS_RESULT_STREAM +from cortext import ImageResponse, ALL_SYNAPSE_TYPE from validators.services.cache import QueryResponseCache @@ -150,3 +150,19 @@ def find_positive_values(data: dict): positive_values[key] = value return positive_values + + +def update_nested_dict(data, keys, value): + """ + Updates the value in the nested dictionary or creates the key path if it doesn't exist. + + :param data: The dictionary to update. + :param keys: A list of keys representing the path in the nested dictionary. + :param value: The value to set at the specified key path. + """ + if len(keys) == 1: + data[keys[0]] = value + else: + if keys[0] not in data or not isinstance(data[keys[0]], dict): + data[keys[0]] = {} + update_nested_dict(data[keys[0]], keys[1:], value) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 8dd0ce4c..0c8f1ce2 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -147,8 +147,6 @@ async def handle_response(responses): if isinstance(chunk, str): response_text += chunk bt.logging.trace(f"Streamed text: {chunk}") - bt.logging.debug(f"{len(response_text)} for uid {uid}") - # Store the query and response in the shared database async with self.lock: @@ -162,7 +160,6 @@ async def handle_response(responses): metagraph=self.metagraph) }) - axon = self.metagraph.axons[uid] responses = await self.dendrite( axons=[axon], @@ -174,6 +171,7 @@ async def handle_response(responses): await handle_response(responses) else: pass + async def create_query_syns_for_remaining_bandwidth(self): query_tasks = [] for uid, provider_to_cap in self.task_mgr.remain_resources.items(): @@ -243,8 +241,6 @@ async def get_available_uids(self): # Create a dictionary of UID to axon info for active UIDs available_uids = {uid: axon_info for uid, axon_info in zip(tasks.keys(), results) if axon_info is not None} - bt.logging.info(f"Available UIDs: {list(available_uids.keys())}") - return available_uids async def check_uid(self, axon, uid): @@ -427,7 +423,6 @@ async def handle_response(responses): await send({"type": "http.response.body", "body": b'', "more_body": False}) - axon = self.metagraph.axons[uid] await self.dendrite.aclose_session() responses = await self.dendrite( @@ -439,7 +434,6 @@ async def handle_response(responses): ) return await handle_response(responses) - token_streamer = partial(_prompt, synapse) return synapse.create_streaming_response(token_streamer) From c709454066b80a4dd251cfa78b8b1268cd5a06f4 Mon Sep 17 00:00:00 2001 From: acer-king Date: Sat, 28 Sep 2024 12:14:33 -0700 Subject: [PATCH 61/80] add more clear score log for each miner model --- validators/services/validators/base_validator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index 89ec58a2..e095be22 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -1,5 +1,6 @@ from abc import abstractmethod import asyncio +import json from collections import defaultdict import random @@ -143,7 +144,7 @@ def get_uid_to_scores_dict(self, uid_to_query_resps, scored_responses: tuple[flo uid_model_to_scores_dict[uid][model] = weighted_score bt.logging.debug(f""" score details for all miners: - {uid_model_to_scores_dict} + {json.dumps(uid_model_to_scores_dict, indent=4)} """) if not len(uid_scores_dict): From 6caf06a844852eb3f4f8424f7c2e77de6dda87c5 Mon Sep 17 00:00:00 2001 From: acer-king Date: Mon, 30 Sep 2024 08:58:24 -0700 Subject: [PATCH 62/80] limit max capacity of miner to for each uid provider and model and batch processing for synthetic queries --- validators/utils.py | 8 ++++++++ validators/weight_setter.py | 10 +++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/validators/utils.py b/validators/utils.py index f1b55302..9cd756eb 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -166,3 +166,11 @@ def update_nested_dict(data, keys, value): if keys[0] not in data or not isinstance(data[keys[0]], dict): data[keys[0]] = {} update_nested_dict(data[keys[0]], keys[1:], value) + + +def setup_max_capacity(item): + for key, value in item.items(): + if isinstance(value, dict): # If the value is another dictionary, recurse + setup_max_capacity(value) + elif isinstance(value, (int, float)): # If the value is a number, increment by 5 + item[key] = min(value, 100) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 0c8f1ce2..df098713 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -18,7 +18,7 @@ from cortext.metaclasses import ValidatorRegistryMeta from validators.services import CapacityService, BaseValidator, TextValidator, ImageValidator from validators.services.cache import QueryResponseCache -from validators.utils import error_handler +from validators.utils import error_handler, setup_max_capacity from validators.task_manager import TaskMgr scoring_organic_timeout = 60 @@ -208,8 +208,10 @@ async def perform_synthetic_queries(self): # restore capacities immediately after synthetic query consuming all bandwidth. self.task_mgr.restore_capacities_for_all_miners() - await self.dendrite.aclose_session() - await asyncio.gather(*synthetic_tasks) + batch_size = 30 + for batched_queries in [synthetic_tasks[i:i + batch_size] for i in range(0, len(synthetic_tasks), batch_size)]: + await self.dendrite.aclose_session() + await asyncio.gather(*batched_queries) self.synthetic_task_done = True bt.logging.info( @@ -229,6 +231,8 @@ def choose_validator_from_model(self, model): async def get_capacities_for_uids(self, uids): capacity_service = CapacityService(metagraph=self.metagraph, dendrite=self.dendrite) uid_to_capacity = await capacity_service.query_capacity_to_miners(uids) + # apply limit on max_capacity for each miner. + setup_max_capacity(uid_to_capacity) return uid_to_capacity async def get_available_uids(self): From 2196991208019f7e867bbc1787825b35584d6de5 Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 1 Oct 2024 03:00:07 -0700 Subject: [PATCH 63/80] send synthetic query at the end of cycle per 36 block --- validators/weight_setter.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index df098713..c20b8975 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -28,6 +28,7 @@ class WeightSetter: def __init__(self, config, cache: QueryResponseCache, loop=None): # Cache object using sqlite3. + self.next_block_to_wait = None self.synthetic_task_done = False self.task_mgr: TaskMgr = None self.in_cache_processing = False @@ -73,6 +74,7 @@ def __init__(self, config, cache: QueryResponseCache, loop=None): # initialize uid and capacities. asyncio.run(self.initialize_uids_and_capacities()) + self.set_up_next_block_to_wait() # Set up async tasks self.thread_executor = concurrent.futures.ThreadPoolExecutor(thread_name_prefix='asyncio') self.loop.create_task(self.consume_organic_queries()) @@ -187,12 +189,25 @@ async def create_query_syns_for_remaining_bandwidth(self): query_synapses = await asyncio.gather(*query_tasks) return query_synapses + def set_up_next_block_to_wait(self): + # score all miners based on uid. + current_block = self.node_query('System', 'Number', []) + next_block = current_block + 36 + self.next_block_to_wait = next_block + + def is_cycle_end(self): + current_block = self.node_query('System', 'Number', []) + if current_block >= self.next_block_to_wait: + return True + else: + return False + async def perform_synthetic_queries(self): while True: # wait for MIN_REQUEST_PERIOD minutes. - await asyncio.sleep(cortext.REQUEST_PERIOD * 60) - current_block = self.node_query('System', 'Number', []) - bt.logging.info(f"start processing synthetic queries at block {current_block} at time {time.time()}") + if not self.is_cycle_end(): + await asyncio.sleep(12) + continue start_time = time.time() # don't process any organic query while processing synthetic queries. synthetic_tasks = [] @@ -209,7 +224,8 @@ async def perform_synthetic_queries(self): self.task_mgr.restore_capacities_for_all_miners() batch_size = 30 - for batched_queries in [synthetic_tasks[i:i + batch_size] for i in range(0, len(synthetic_tasks), batch_size)]: + for batched_queries in [synthetic_tasks[i:i + batch_size] for i in + range(0, len(synthetic_tasks), batch_size)]: await self.dendrite.aclose_session() await asyncio.gather(*batched_queries) From f2d194a2e4e7259c58e59ed2f4d6b4a271f89cfe Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 1 Oct 2024 03:16:05 -0700 Subject: [PATCH 64/80] update cycle blocks --- validators/weight_setter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index c20b8975..4078fb75 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -191,12 +191,16 @@ async def create_query_syns_for_remaining_bandwidth(self): def set_up_next_block_to_wait(self): # score all miners based on uid. - current_block = self.node_query('System', 'Number', []) + if self.next_block_to_wait: + current_block = self.next_block_to_wait + else: + current_block = self.node_query('System', 'Number', []) next_block = current_block + 36 self.next_block_to_wait = next_block def is_cycle_end(self): current_block = self.node_query('System', 'Number', []) + bt.logging.info(current_block, self.next_block_to_wait) if current_block >= self.next_block_to_wait: return True else: @@ -208,6 +212,7 @@ async def perform_synthetic_queries(self): if not self.is_cycle_end(): await asyncio.sleep(12) continue + self.set_up_next_block_to_wait() start_time = time.time() # don't process any organic query while processing synthetic queries. synthetic_tasks = [] From ea42d2dd1064cd14b8b92316a8d1928e1c0d4c56 Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 1 Oct 2024 07:59:35 -0700 Subject: [PATCH 65/80] suffle queries --- validators/weight_setter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index 4078fb75..df8db60e 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -187,6 +187,7 @@ async def create_query_syns_for_remaining_bandwidth(self): else: continue query_synapses = await asyncio.gather(*query_tasks) + random.shuffle(query_synapses) return query_synapses def set_up_next_block_to_wait(self): From 17bd507a028ab9fddcca51f007b6e8853cc48bc3 Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 1 Oct 2024 08:03:09 -0700 Subject: [PATCH 66/80] clean code --- cortext/__init__.py | 2 -- cortext/constants.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/cortext/__init__.py b/cortext/__init__.py index 6cf146a3..be970b4d 100644 --- a/cortext/__init__.py +++ b/cortext/__init__.py @@ -52,8 +52,6 @@ IMAGE_BLACKLIST_STAKE = 5000 EMBEDDING_BLACKLIST_STAKE = 5000 ISALIVE_BLACKLIST_STAKE = min(PROMPT_BLACKLIST_STAKE, IMAGE_BLACKLIST_STAKE, EMBEDDING_BLACKLIST_STAKE) -REQUEST_PERIOD = .33 -MAX_REQUESTS = 20 # must have the test_key whitelisted to avoid a global blacklist testnet_key = ["5EhEZN6soubtKJm8RN7ANx9FGZ2JezxBUFxr45cdsHtDp3Uk"] test_key = ["5DcRHcCwD33YsHfj4PX5j2evWLniR1wSWeNmpf5RXaspQT6t"] diff --git a/cortext/constants.py b/cortext/constants.py index 6abb4097..ebc6e9a3 100644 --- a/cortext/constants.py +++ b/cortext/constants.py @@ -11,7 +11,7 @@ # from https://openai.com/api/pricing/ "OpenAI": { "gpt-4o": 15.00, - # "gpt-4o-mini": 0.600, + "gpt-4o-mini": 0.600, # "gpt-3.5-turbo": 2.00, # "o1-preview": 60.00, # "o1-mini": 12.00, @@ -51,7 +51,7 @@ bandwidth_to_model = { "OpenAI": { "gpt-4o": 2, - # "gpt-4o-mini": 1, + "gpt-4o-mini": 2, # "gpt-3.5-turbo": 1, # "o1-preview": 1, # "o1-mini": 1, From 2155fbcef6c4158017e6fbf7519d23a7b55dec4c Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 1 Oct 2024 08:10:07 -0700 Subject: [PATCH 67/80] remove old cache data process --- validators/services/capacity.py | 7 ------- validators/weight_setter.py | 34 +-------------------------------- 2 files changed, 1 insertion(+), 40 deletions(-) diff --git a/validators/services/capacity.py b/validators/services/capacity.py index 318b6e70..6b3e2ed4 100644 --- a/validators/services/capacity.py +++ b/validators/services/capacity.py @@ -3,7 +3,6 @@ from typing import List from cortext.protocol import Bandwidth -from cortext import REQUEST_PERIOD import bittensor as bt @@ -14,7 +13,6 @@ def __init__(self, metagraph, dendrite): self.timeout = 4 self.uid_to_capacity = {} self.remain_uid_to_capacity = {} - self.epoch_len = REQUEST_PERIOD async def query_capacity_to_miners(self, available_uids): capacity_query_tasks = [] @@ -37,8 +35,3 @@ async def query_capacity_to_miners(self, available_uids): uid_to_capacity[uid] = resp.bandwidth_rpm self.uid_to_capacity = deepcopy(uid_to_capacity) return uid_to_capacity - - async def refresh_capacity_per_epoch(self): - while True: - self.remain_uid_to_capacity = deepcopy(self.uid_to_capacity) - await asyncio.sleep(self.epoch_len * 60) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index df8db60e..d9496f05 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -209,7 +209,6 @@ def is_cycle_end(self): async def perform_synthetic_queries(self): while True: - # wait for MIN_REQUEST_PERIOD minutes. if not self.is_cycle_end(): await asyncio.sleep(12) continue @@ -518,7 +517,7 @@ def get_scoring_tasks_from_query_responses(self, queries_to_process): async def process_queries_from_database(self): while True: await asyncio.sleep(1) # Adjust the sleep time as needed - # accumulate all query results for MIN_REQUEST_PERIOD + # accumulate all query results for 36 blocks if not self.synthetic_task_done or not self.is_epoch_end(): bt.logging.trace("no data in query_database. so continue...") continue @@ -554,37 +553,6 @@ def batch_list_of_all_uids(self): batched_list.append(uids[i:i + batch_size]) return batched_list - @error_handler - async def process_queries_from_cache_database(self): - # await self.initialize_uids_and_capacities() - tasks = [] - for uid in self.uid_to_capacity.keys(): - for provider, model_to_bandwidth in self.uid_to_capacity.get(uid).items(): - for model, bandwidth in model_to_bandwidth.items(): - vali = self.choose_validator_from_model(model) - questions_answers: List[Tuple[str, str]] = self.cache.get_all_question_to_answers(provider, model) - if not questions_answers: - continue - # select one of questions_answers - query, answer = random.choice(questions_answers) - query_syn = vali.get_synapse_from_json(query) - tasks.append(self.score_miners_based_cached_answer(vali, query_syn, answer)) - - # process tasks in batch_size to not exceed max request per 2min. - batched_task_list = [] - for i in range(0, len(tasks), cortext.MAX_REQUESTS): - batched_task_list.append(tasks[i:i + cortext.MAX_REQUESTS]) - for batch_tasks in batched_task_list: - start_time = time.time() - await asyncio.gather(*batch_tasks) - passed_time = time.time() - start_time - sleep_time = max(cortext.REQUEST_PERIOD * 60 - passed_time, 1) - bt.logging.debug(f"wait time {sleep_time} to not exceed max_request {cortext.MAX_REQUESTS} in 2min") - await asyncio.sleep(sleep_time) - - bt.logging.info("Successfully complete scoring for all miners with cached data and " - f"total score is {self.total_scores}") - async def score_miners_based_cached_answer(self, vali, query, answer): total_query_resps = [] provider = query.provider From 45cbf156185afbd86849bfcaf42a3cdb05491955 Mon Sep 17 00:00:00 2001 From: acer-king Date: Tue, 1 Oct 2024 10:57:07 -0700 Subject: [PATCH 68/80] add readme and fix some issues --- .gitignore | 3 ++- README.md | 6 ++++-- cache.db | Bin 16384 -> 0 bytes cortext/constants.py | 2 -- 4 files changed, 6 insertions(+), 5 deletions(-) delete mode 100644 cache.db diff --git a/.gitignore b/.gitignore index 6572d92f..9cca6664 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ validators/.ipynb_checkpoints/ **/.env **/Cortex.t.egg-info **/test.ipynb -.env \ No newline at end of file +.env +**/server/**/*.py \ No newline at end of file diff --git a/README.md b/README.md index 180ada86..0d3b7980 100644 --- a/README.md +++ b/README.md @@ -218,9 +218,11 @@ After installing it, copy `env.example` to `.env` and substitute all env vars with values appropriate for your accounts. ## Mining - +# step1. +go to cortext/constants.py and change bandwidth_to_model value as per limit of api. +currently we support only 3 models: "gpt-4o", "claude-3-5-sonnet-20240620", "llama-3.1-70b-versatile". +so don't add more than that. You can launch your miners via python3 using the following command. - ```bash bash start_miner.sh ``` diff --git a/cache.db b/cache.db deleted file mode 100644 index b0c0413f7cb88861ade9d8dbe293f358da3ff39b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16384 zcmeI#O-sWt7{Kv#8%_p$xk#@$IvB$6EI74?;hZ{q*bpprjaXdQy>xRPhbK>dGCzw~ zkG8>z)8Pjg{|Azg=1H6UnoF)OTYjYET{sEcNbZY@Fbr`hr4S+$mt0)Z_}w?sfq&YJ z*qgr;%O4_JdKIPj@>|>x2q1s}0tg_000IagfB*sr++_1+-87blKlIeX_ZH4*63%^3 zO`IU~)S&P7?^W)&V>N6`HrppwSN=_s=WVGIHM3 Date: Wed, 2 Oct 2024 02:31:10 -0700 Subject: [PATCH 69/80] fix issue for invalid provider and model --- validators/services/validators/base_validator.py | 6 ++++-- validators/utils.py | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/validators/services/validators/base_validator.py b/validators/services/validators/base_validator.py index e095be22..e19caca2 100644 --- a/validators/services/validators/base_validator.py +++ b/validators/services/validators/base_validator.py @@ -9,7 +9,7 @@ import bittensor as bt from cortext.metaclasses import ValidatorRegistryMeta -from validators.utils import error_handler +from validators.utils import error_handler, get_bandwidth from cortext.constants import TEXT_VALI_MODELS_WEIGHTS dataset = None @@ -135,7 +135,9 @@ def get_uid_to_scores_dict(self, uid_to_query_resps, scored_responses: tuple[flo if model_weight is None: bt.logging.debug(f"not weight found for this provider {provider} and model {model}") model_weight = 0 - band_width = uid_to_capacity.get(uid).get(f"{provider}").get(f"{model}") + + band_width = get_bandwidth(uid_to_capacity, uid, provider, model) + if band_width is None: bt.logging.debug(f"no band_width found for this uid {uid}") band_width = 1 diff --git a/validators/utils.py b/validators/utils.py index 9cd756eb..60c7d2cd 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -174,3 +174,8 @@ def setup_max_capacity(item): setup_max_capacity(value) elif isinstance(value, (int, float)): # If the value is a number, increment by 5 item[key] = min(value, 100) + + +def get_bandwidth(data, uid, provider, model): + value = data.get(uid, {}).get(provider, {}).get(model, 0) + return value From 8f6d34faa4dd40c1a2da0d522f3c584cf3a50f36 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 2 Oct 2024 06:30:35 -0700 Subject: [PATCH 70/80] send max 100 request for each miner per each interation --- validators/weight_setter.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index d9496f05..ebde2c64 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -5,6 +5,7 @@ import time from black.trans import defaultdict +from click.core import batch from substrateinterface import SubstrateInterface from functools import partial from typing import Tuple, List @@ -221,24 +222,43 @@ async def perform_synthetic_queries(self): query_synapses = await self.create_query_syns_for_remaining_bandwidth() for query_syn in query_synapses: uid = self.task_mgr.assign_task(query_syn) - synthetic_tasks.append(self.query_miner(uid, query_syn)) + synthetic_tasks.append((uid, self.query_miner(uid, query_syn))) bt.logging.debug(f"{time.time() - start_time} elapsed for creating and submitting synthetic queries.") # restore capacities immediately after synthetic query consuming all bandwidth. self.task_mgr.restore_capacities_for_all_miners() - batch_size = 30 - for batched_queries in [synthetic_tasks[i:i + batch_size] for i in - range(0, len(synthetic_tasks), batch_size)]: + batched_tasks, remain_tasks = self.pop_synthetic_tasks_max_100_per_miner(synthetic_tasks) + while batched_tasks: + start_time = time.time() await self.dendrite.aclose_session() - await asyncio.gather(*batched_queries) + await asyncio.gather(*batched_tasks) + batched_tasks, remain_tasks = self.pop_synthetic_tasks_max_100_per_miner(remain_tasks) self.synthetic_task_done = True bt.logging.info( f"synthetic queries has been processed successfully." f"total queries are {len(query_synapses)}") + def pop_synthetic_tasks_max_100_per_miner(self, synthetic_tasks): + batch_size = 300 + max_query_cnt_per_miner = 100 + batch_tasks = [] + remain_tasks = [] + uid_to_task_cnt = defaultdict(int) + for uid, synthetic_task in synthetic_tasks: + if uid_to_task_cnt[uid] < max_query_cnt_per_miner: + batch_tasks.append(synthetic_task) + if len(batch_tasks) > batch_size: + break + uid_to_task_cnt[uid] += 1 + continue + else: + remain_tasks.append((uid, synthetic_task)) + continue + return batch_tasks, remain_tasks + def choose_validator_from_model(self, model): text_validator = ValidatorRegistryMeta.get_class('TextValidator')(config=self.config, metagraph=self.metagraph) # image_validator = ValidatorRegistryMeta.get_class('ImageValidator')(config=self.config, From fb730b7901919e8b414dc939bc7237e5526e5ef3 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 2 Oct 2024 07:14:19 -0700 Subject: [PATCH 71/80] limit max_capacity to 50 for validator's performance issue --- validators/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/utils.py b/validators/utils.py index 60c7d2cd..94501282 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -173,7 +173,7 @@ def setup_max_capacity(item): if isinstance(value, dict): # If the value is another dictionary, recurse setup_max_capacity(value) elif isinstance(value, (int, float)): # If the value is a number, increment by 5 - item[key] = min(value, 100) + item[key] = min(value, 50) def get_bandwidth(data, uid, provider, model): From adf0a8de08e3f736402869a21449865064c7e21c Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 2 Oct 2024 08:03:52 -0700 Subject: [PATCH 72/80] safe save bandwidth. --- validators/task_manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/validators/task_manager.py b/validators/task_manager.py index 705c824f..d58e3920 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -32,7 +32,6 @@ def update_remain_capacity_based_on_new_capacity(self, new_uid_to_capacity): for provider, model_to_cap in capacity.items(): for model, cap in model_to_cap.items(): if self.get_remaining_bandwidth(uid, provider, model) is None: - self.remain_resources[uid][provider][model] = cap utils.update_nested_dict(self.remain_resources, keys=[uid, provider, model], value=cap) else: diff = self.uid_to_capacity[uid][provider][model] - cap From 10434772d038f0bbc3908f1f910d317a477a7e6d Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 2 Oct 2024 08:38:40 -0700 Subject: [PATCH 73/80] bug fix --- validators/task_manager.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/validators/task_manager.py b/validators/task_manager.py index d58e3920..b1a39af4 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -28,7 +28,10 @@ def get_remaining_bandwidth(self, uid, provider, model): def update_remain_capacity_based_on_new_capacity(self, new_uid_to_capacity): + bt.logging.info(new_uid_to_capacity, "acerr") for uid, capacity in new_uid_to_capacity.items(): + if not capacity: + continue for provider, model_to_cap in capacity.items(): for model, cap in model_to_cap.items(): if self.get_remaining_bandwidth(uid, provider, model) is None: From 45d11cfde814130b836c93e7e2a45f663be4d937 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 2 Oct 2024 08:41:11 -0700 Subject: [PATCH 74/80] clean code --- validators/task_manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/validators/task_manager.py b/validators/task_manager.py index b1a39af4..4111f4e7 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -28,7 +28,6 @@ def get_remaining_bandwidth(self, uid, provider, model): def update_remain_capacity_based_on_new_capacity(self, new_uid_to_capacity): - bt.logging.info(new_uid_to_capacity, "acerr") for uid, capacity in new_uid_to_capacity.items(): if not capacity: continue From fdcd212988fe242e19476778ce661897a7eb2202 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 2 Oct 2024 08:46:35 -0700 Subject: [PATCH 75/80] restore cap with new miner --- validators/task_manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/validators/task_manager.py b/validators/task_manager.py index 4111f4e7..d2a10e33 100644 --- a/validators/task_manager.py +++ b/validators/task_manager.py @@ -40,7 +40,9 @@ def update_remain_capacity_based_on_new_capacity(self, new_uid_to_capacity): if diff: bt.logging.debug(f"diff {diff} found in {uid}, {provider}, {model}") self.remain_resources[uid][provider][model] -= diff + bt.logging.debug(f"remain_resources after epoch = {self.remain_resources}") + self.uid_to_capacity = deepcopy(self.remain_resources) @error_handler def assign_task(self, synapse: ALL_SYNAPSE_TYPE): From 38fb88d4642ebe49e07079d62e082786bfdc2d90 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 2 Oct 2024 08:57:19 -0700 Subject: [PATCH 76/80] limit bandwidth per miner as 50 --- validators/utils.py | 2 ++ validators/weight_setter.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/validators/utils.py b/validators/utils.py index 94501282..6f2da920 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -177,5 +177,7 @@ def setup_max_capacity(item): def get_bandwidth(data, uid, provider, model): + if data is None: + return 0 value = data.get(uid, {}).get(provider, {}).get(model, 0) return value diff --git a/validators/weight_setter.py b/validators/weight_setter.py index ebde2c64..6303563c 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -243,7 +243,7 @@ async def perform_synthetic_queries(self): def pop_synthetic_tasks_max_100_per_miner(self, synthetic_tasks): batch_size = 300 - max_query_cnt_per_miner = 100 + max_query_cnt_per_miner = 50 batch_tasks = [] remain_tasks = [] uid_to_task_cnt = defaultdict(int) From 258124ebf9f0b088e8e4e2008c5cf73fbf8dc812 Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 2 Oct 2024 09:28:43 -0700 Subject: [PATCH 77/80] bug fix upon miner detached --- validators/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/utils.py b/validators/utils.py index 6f2da920..62388ca8 100644 --- a/validators/utils.py +++ b/validators/utils.py @@ -179,5 +179,5 @@ def setup_max_capacity(item): def get_bandwidth(data, uid, provider, model): if data is None: return 0 - value = data.get(uid, {}).get(provider, {}).get(model, 0) + value = (data.get(uid, {}) or {}).get(provider, {}).get(model, 0) return value From 47583b288628fd49b1c62e07dfde09d4e4b86c9e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 2 Oct 2024 18:42:50 +0000 Subject: [PATCH 78/80] fix organic axon --- cache.db | Bin 0 -> 94208 bytes miner/miner.py | 2 +- organic.py | 16 ++++++++-------- validators/weight_setter.py | 7 ++++--- 4 files changed, 13 insertions(+), 12 deletions(-) create mode 100644 cache.db diff --git a/cache.db b/cache.db new file mode 100644 index 0000000000000000000000000000000000000000..125bc8558cab94ab8a40291da3301c1eda0e64b6 GIT binary patch literal 94208 zcmeIbd#s%4ec#8`uC-e3g_0$CVe}78XcXRMdAM`GtYIizk}EM>AGcy`D4T zoSCDUIpk7?fI3MHxOO5ZZBz7*B1v7R0g9qPf!J1KGy(cYivVrWOOP5xiXt!&^ai6X zP!z}??dSXZJ&dmEhm*4Zd-=6zF_1<({w93WuFj=>H7e0C6 zx#uqY*;ebqg$u8T|M|!Me2l+e2tVte=Ndns_2cUou08sdS1J< z|Lp$kAh3hL4gxy}>>#j%zzzaC2<#xRgTM{~I|%F`@aLa@@%7H@&;8PK(|KNeIL$x& z;ApvcG|h|U2ZxKim}N!Utp~S$=0WT3z1w%L z-}~9tmu~&+)s1Jqeq5~9)5Y8#+x#k-uO1i6EnmbXZu!dAyyf%t^pJUy!=n~}c&}Q) zn{R&X(5;<;H|zs!?OP%IbJX9_YZ8JA9U=W=Rvhk2;F{&|6jWF-|5HKFa7gN z|IelW^V0u*>7QNtrpefI|%F`u!F!30y_xoAh3hL4gxy}>>#j%zzzaC z2<#y6Cl3Oj{WNR(^r!gilb87G)mQlIm5coK@=N^n(u@4{30eLxKCcfyd-2AF*M8&D z@oTTY{I_2Ixl3=o`m3+~{40B}{x`3D`{IB7%K!KBKfCxVuikq3vzPwfYyZo|8?UT* zVfSYTfgJ>P5ZFOr2Z0>~b`aP>;0GB3w_fr%#_a|9*(n-goaoX$k z%dE_cPB+imWirafqfxux9;8{)PWwsTOVaVM81>tqwNXZ8k`>)@*y(1SGVArzHt(h7 zc#ux`sb7pnd9Twg($09;X1rlJ>ZSRxD9f}n=#D3y&)6t~Q8wsz)5)M)rdi$|4+f*6 zm$i#tQ4W$$nigq)G)dB7F&TE#BpVJ#WxJD%CTZD8lm2k34d?xHA|ICX->_FM)qvaWybb8ML#Rc&alrBrN!uF8)Xb0uqBXwFzStZ-J;V5NeApH z0i^jL>rX~$2BLu^%-v41?f^anheCE)*L(i?S45GwhEjnXd1`LIlTNv9kokk_a)>GuYCr#%KXZiTYkYfVp zhedlb$^dUZXoEgKWTWK$NuG2^qfT0k+mIoTwuj?(+3O9HGVgSUqi#QMb2y#OxEOUu z!xE-7NPC@LKIvzJ`~@4O+v&BlvR^{wFq1Ysr8Dl1`sF0=6e)1%a^gU3l9Z!ie*nb0 z!(lH4CQv{p@8r+>jwVogZ&FTrNw*l~Vqd*cI?loHegQ#tSQNY~>kjf!!LOas2m~*g zs-1Kp&X3)M@4l2W1*?DzCs~>mWd?Z1qsahbg!`50kQo^io&fhOGp6g7kOJH6Cc}2p zG5i13Z(g|co0o3C_J6$gS6>^v_R6c@y!1a``b(F7(5=E zuMd`sqiL4SlH+KEpR*U((SnqYQ({S~)KihiYvp@0N7nX~!&t}PCve&!T*&DUf zy+_4zm8_?;;xnIm;YWY;x%ZEX`SsiTN9(=*;?t)-`Qc{`@~QJK=ac7s@Y1P8y!Na` zyn5cTUwPI@FQ1zH;(5n@=~-ic;?&qLo_FkzpL+F&o;BMG=N}u=sYPvs9*2l{teDqB|%NB=6$Lk{hAYJ57J~&8L2Yh+?GT$tV zuOCmBe4`IPD5u3NU-8|~U8Y81mFyR*%RkRk)h~SWik5Qh;B~c?tOVvK zl)KDZ(|I{NE&yYT!&n_3Qmc{{3^-jZueS2yFq!APFwa{@$IFbPX5Fo$WW6qy^VPN1 z7mH=_<14=BKhAG8<%r7f;tb!uc=5`WyNlIyJ!LDm?fl=qa^>R1&b8K+E4SwvAX^oA zSkRR#Z@0F=K~K@t0!SP$_d%n1cwTKTTC1ZX6LeafV1AsfSt%bD%SY3!SY2(+iX_+G z)(hT?(>bnb_sil@F+VP@wch2e;zMe1W*pq(Dfqi=WwXWlKrj3ApmeTrj*8@AD?eV^ zu%Pw9Vm8f_CjqH>`v4ZsS`P*E6JwlwH#~I+!5plvUF-@hUoIZU&9oLrM~mh9cn$!I z)idC;@vOlnJ?2~?TL{bczcvsRt$97e2Dmt3x`t!`d#M+|i}}8GS)tGzVgx-lfR*qy z2Rnb_Z?B5kj6sjr`2vvc7s;}92ylStw#jvZ8Bo_O{CF^ZsGl0ZJprJe0CXo=K7@RE z&=7dJI9!OBVJ90PI15~X)LRYcJf5x(Hea^=JKn7munu0yKH|JuaspY3L?EHK0?{7; zToz((V5|)rfp<3C1fNA&0`z>k$`;3SA@Tea76zyY-m|>5nkPrsL|$Jm_E8$Pkk`92 z$l;jp0F+_ySpX4@Xuog_e*ummDzld;wM*6L$g`ZxGk*IRw96ET6F4IpXY4 zYmEDPCW%d-{QaTG$0w)%GoUbHWsyA8* zVBT+Fgu%i&1FWm{Xlh-FScPb)wK_E)|Ca$W5O%E=FqB_8Hm=GXY!E68Ct!|LTza&sK12d zp0Td-jN<{!-wSvU^o*c%2+CQJVtXY-Gs(7+m2I_@bTmeYUhN_62mvJp0;Gb7-fRxka_V=_DwD4n) zIxjE;bK`wT1+l;Rz7&iNjWJ*X3!XVot9d|!(SciGt6SEEdVaoluU5$UHL!JWI^R!Y&^t$cxB{o-KH{tUoZPr;2M{ZJdR>LYgP*WD6W%#7}uaE1&=$P7BHg^UH?Lb(qreLCG3V zTBOD-mQtZ-PoS3Db202}21bh~orIpy`>E{D7v8Y)uC`KmbL%MB2(UpdW*&4J=4`$5 zq;(AXFCpf+tMX4ZCG~oR$)@^LIPbVvlQ$bB*Qmf|U=^D*nievJf{qY@a?(PzrEX|F zK^&rt75RQ~?J0)(_22vZfA{YH`WOG&cj;<++G~HR?cf`M-dgvH8LBIK2zKS8@dl=ueVrJSW*j985X@L*N;uZIXaqQxobGy#5Oo6 zW=FEQ_LKSa8@@rIr5Mh`fKO6OE4~AJtl^Cak2;*$NbSerXb;c*YASb87c|OJ4p)?a;F8cVP6CU zSZ{CJ!pmdnqmgMe9GuP7I&3Nstgw+-;|wDj*Osd^zH~F=oC-j5%wNvUcvULy1h5Pz z47Sd9f*MgV*89c2j9xpLlUv(P=Key(l2(|sA~#qXhARh@934zE5U+MHiW1%A%9eDk z!3TQg(HuD}Kg8TW364Q^ojTQF1Q8(3S-b9VzXBA|r540wO)0 z%A13CY`$+M>dd7AV$Tmex-IKTf>L?y;z;|wPDdb`{elE1w`z#$-^%ho2AQjk<9}vh z(a3~UjWL%Ck>uX-QOmfmFJMKFru^9CXo5Qu@Cbj9Hk-s&E{g)jd6M;&3xFeBX&kiV zDnYk2e@`|Y`d0U8rtwB-YeuL-9oF8AdOzD$6-(YIA^zj zsZ^5rzZHPlsGPo+L1X}7SE6D<3%z@TTH6|9Zf1;_K%2Cr|)HVfENA(R4wE7_|O z1{WvV`fczNqBEVs(DQw$VcELxD$zD15Ju~b;E{C7j5<-&rRt-o6$^Ena@_KRh{{>8 zZYw?KBra57M|H~k)vB5*hON*J%=EwortU$px4*y?dIxz@PUWh7 zx^jg{E(w#jT(I2rFop2nJTy%$Yn$p2R4ZqAW@jHt>vF>dPeQPmo=#@bJ5WVHd@c7p z`f(Cvwk@N^c>(x%xq@HIKK3))@VlQ8bcvxB_X^#bSsabshsm^ ze7nKyu@#%#zLqD5%0?i7P_0uPm%i3 zLyJjzJ%5BREI8nrz?Gb+mJCCxt=J^KBEeT#Q@(;qb~5|X0<38Gnk!l|!EC$(?UcnH zNOz8GAWkICCeMO=h>3(}b8*4KlOBN~wZ+Kum76?nWeBFF#alsA1gWeKQ}c|9^h~v~ z4KWk!s+Ne{tqjEdi2P+(re4-D8jC7)Ge1cyrm%_>e zzeqq|_S6R%jVk#)CR6x_A^)NNJYdQPnzR0NstkmfVSvg}{}$}K`hz)cwZ@yro>&mx zJx@wU@}cE3{Ha^F_{~R>it<(YUk(*r$QLrD(L|<)=4+9;{p9mP8T)`kxqPr*AFbYg z>#d{dhe?_|xyCBrA`1N0{+L3o;gGtt(J)Wi-8S`vgN*XAanjEdYV@e~>QNLqrp|3R zqO7XdRW0B+qXd$oyIz-CGOCL5PH#A*V2J|0oEk97e1>fb_1c5-gJHjY?Q2K-8`&=t z<`f2h6xlCT0^3Fk%-Pv&f0$V?4^Es3+P$Bc+5hHAD=;%rOOX;Np#{DZev;VUhxMcE zEuel}8HdtH=PNsvy_l!!!kMYhx?dh z3|2fMa|vfeAur3msguaCInvYyvc2K;AvSVRTaqzU$UWpUQX)0Kp92*s&LdwY;cP8;^zO{`pya1wH z8>--r9qh;WQcIkBK;nJ)C(a7OOT1t-trR89TOFDX9XTNV@+!g7BmUOj^(br_?6AkA zy{3fy=5qtPt%ElUaDb3U%xIB2bvea9QKLgOSU6l>*Ovjq5JQ+Qt=ntQpK=c4n_L`Y z>0>cY(b?_PHxB)43!sFCb11~}6=QDWNAfc=HAY)-^p6N;C&20+FP5|XT1C5OldA#` zKlR?V)~{wClNe5Z`Nog_=!?`ce^14WJ27lhUYV6UG5m~-P!q#3!U7kPHjEXmsKo&Y z4@-xhh~fy>je5>0m^;NVrF?qSDLRPaF7?&LD5K80kA%&-#h{Z6@}!gGG%=vGI33fi zU@)X#!Fb&3WGSW0gM30E@UTM}@gSioKyRF=n4WPvJqDPRS-aaSs5I{Oa%SmK?>`!L zOKOdWZAzlI3T|qUKQh7X`QjT=+mpxtXe2ag2~(w8N-#(`5}Q!kJG^Lx8D_qCNl}l7 z9m1^nkxZ!EbDxMH6ZM7LDF%&r2?t)wMdbVO(j+9NIM!!IWP*${`H3GR)RwSV#}WD3 zU6h(M2vg3L2(_H<%cqJ>wjldxwm{g4SNnTmln0>M>LJ-Tt1yL$&;-_ua|7|oR4l+; zOo1zdnAN`UQKt+Wmrwp4QEFth#2^m|spkttMW(c;N181Zk5VvSW+7f2$NlYi{}U~x z+?j4FC~t_us3A1;m21s<0Shnx-;e*z-}sT|;PQWY@Y|!${=|#lYjOFG%m0by@)-L+ zB>VqkAN$=4ufE7%yFWV!{3(LKzwyN%f>-_eOaE3Ojr4nL5VGS{f1-KSc^q0Ftz(Gn z3KIHTg{MGbOrken1zKL3G=?HOm``;fj0trrofJAiNmWjYyh!KNQd9co(8-4WS4o>r zSLm5Et{Tz6il#|Lo+bTKZEt9j)hC$hAmN?0%%(HAXNRx3|(3WSCp?A_A zrbid7geFonttz|eV9*}qD5c$Tw?7=E$zVJlCN%izr=7eUb^2Si)CnECJ}URpN35k{ zn9Gn@si+N?KXL6N!skR|QN8dW;Of{sb}6Xa&pjqf-8+^FS1QW8%q>u5DrB4he27S+ z7?Syhu=kbRspv#O`4t`_D!R}_qqnBD3}b>$j2RPl+$$yBFVqltij(KqDDH=~#8?c} zSTW8Kt0KBt9I2+IY`v=zK1Has%v!&e!izQyP$&e$^V3Wa@?c_>kRLVX3WzaPO2r;{ zl)gp^r8HhftjL5-t`^GPasxrt0;~m_fkokA(wP|9u-u-4yf@4v^~GdK_Nv82wOVsi zRkne-Z9n*a$RfhAljHosVIqiW0hOJ? z!fRM(ivw^1(QU3u1c6s`?pbR| zOF5L_q)H4q5-qi{Hi5v>;v+d$C#t#-e9&^`3gQ%Cc@R=@>@Y7&v9y?XD!+1E36e^H zz@WuW1{)e2z(_|Z3auQk_ZH=z6Mi^c(XNmPc-y>5#A2A|+K7lDH+FFny-9nJm-YmE zdH{W4`bI&E=kg(EPjCg3S-ipb&5E}$x{duiTAVa{fA_)b|Nd{zUy#}R)!)5^+55eA ziQB1?J5>^wJWSUL;h$RWev<;uWB7__+tSe?y{#z+Aqm~e4>wcVB~z7gv|0b&d$x*( zsrPZS|1bQl3m^Y?`0G#NpI?3QqQVZVU;cR+dEaA4${l6yDD%9Od4u5Ylf^MAC^|UG z4r+o_1r@GIF(o+(VdnKrF0S>pNtkn5b^{u74rshO#9kv)mFB|Pn69wnE}cl*6O6KU zFH^7Kve&0~aeqt`-QFch{C#TK)$xb9)Nt44-5j6*-J%7}5c!?y z3@`szIp=)}vOgr&O0lh#=AiSLij23NBE+Dvg_2hrT1evhwpTS!lL^Y;=|{ttLLyA5 zhhH5NbXXA>u|f4+2rr-vg)m0c@X!w}nG^F3dq)0*tXNucU?>5U!4y)nD@`?I^|!uA z$e$tI0z!W?{Z-L?tlFryMU}m3l0gt;NrqdQ9MAa&jDJjP4e4FHk&|IZE&zZbo@v-+J@$U%33&e&OSC@b&)npCt3E_dWIz z-tqn&?>{f^w~+IkCLtO86~hi}CU9pyz7qU53tWkt6q8kUa`f+gXF{M~Vr>lCqe)Iv z@<|Fy@3!fqpVG9PPdgp@cGEe2KsV|B*xG2*HoM4)F{@qn*js2LCurqNx9o)G>FsjV zp3t^E=_pX$9d(NW?;$=y8i~{2ewfj5n;!PPewT3fptD^FjN2bs5W31h*@#1*osRN{ z8=J0}kR{1XmEom!NDbcrDC3p%F}ce)xcMy9@rbq*azHt^w}d6Kn!G7Zg(-NT+!y#j zyxk((7M4!&-M!ty8K8O@c^2^7DZ5Ngg%n>@b|BWOOfO@(?nw4pQHyKuQKG?WuWJ!^)@&2M+3cI*3m zm8XoV@5k~J3Yd?kIy3ww$~e%`R-&Hjhq&D;y$`5X-iiuiwDah>Ww%uG-!#VacI#(R zM8$AuZf28}f?&m!-07t`=C1@JefiaKz^{^N~ttERFU^pEn#9ekD_oZnBn-!Vb zcSS3>kyHe)q_jD-vZ5Ucbx1bisob~VZ&mIbxp?l6ikXqCCxw+`B6x%T6=l2PnNbrMAK|a zZ#lr)tb`SHJiXE-)=#L@MAgoO`FIh;IaTKhPRE>^7~SzIj^?(g#yg;K4xY*cZ?Xs*(TYVa zW$LE73|v=DBYm^nbqn74INJMShLfG~)J#$Mb0o*UPi%#0sOVLNmbRj>3t*}{`guMo zy<^nm#lFiZbl~TV{4ZTXwfLh98zGXc(A}m_Nw>s_C8@@1~yn35FT$-U-<||HUS`&hbs)bxE zS8s+e*PVsQ$?Nho(>CFb9q*X{Bs)#rmZ}I4p$mJ>At`q?^jsNBkn=S3E)v$eGzPHd zN*A5j%9=T(Ia)d3fO%TG|Do^5GDS`6cHANu-nk0vDwq`8X3Hl&22696s-trc1%7S+Zw|H zHjDM$kCK$yzgmGf6J#k#?$)RwCl=YHRCFV?`+!6d2w}{Mr%iKpPq6WIL{%QSy9uls zPc@T)FBoMCqrw&q9pji*7!gQ0^mGm0hU%!SsUCUzTZnRe8g!=ye%7c!&KelqBRvzO zz_;+7a5yoHW{n<9#FaCrRihN12~`XdJ#~b#E?|b1MN(uLqvWBAXYecY`oKaO2uTo9 zMFtMn3L~Gj!P6MG1#DFp605fGb?R82H8{|-aOWL(vb4kZNPo4Yfj92F_omUqb!sl~ z(1?z26Q!G>A)7Mw#`W8Qloe<5Jt>H8LIkx*oGh8`(;H-c;JS><6~d3|bZNv&gwG{N zB~@xs@_-(*KQ7f@H77OLfWP*|nb38{n+R{Tu00RhO-`fTY4~kOHlMTtX0uoB=?Q_U z1#EB9qUB)vEMY+xIPlB({+tRm_X=0bapG|xRnh^{uA-Mw>!{Go!^sFAc-zw`_@tr9ZOHj@1u+vi-znF#fui6z4U{b;+Td&(FQ33ITg_IavbGF2l4Ca8xcDM~+RJkDD@eDO)ToYGXum2(H(Hs<9vo=CEy!$@ZKp})V8MBq4- zWi*161rjVA^JJWti=A=sr*Z3#o_*ck?7t@rs#JKH7G_PJX^(g`frngYuN2*?6^NLM zWB4N+q%!C*fNCoxAb;oxZXC$f@2E#dVWT9iGffRAt8LocN$zwFYGR}l9H1fSBjw^d zX((lC{aL9TMs65Yu45erX{zv$r0G?2!!-4$3b87AZ9E~0zYU(?YT0R)0MHzvBRqoI zujBG6$Y=s>4)=8F^^E%e-+J>uUVh5r|Ihzl7hd^S`D^!Q2Z0>~zRw}>S3miZf~ueU z^^V$Ue2>@7?t-ejpekC#xl-LMsEU))^DyvAm?tns6+Ta_V@1=R;QCXS+}6S}#Yt=< zBgMS=Pi?f(ozOLdONyzp&C9fy5cEujWpB*g;aqR6%bB?!del$3NVrSDbuj2~Ic}%m z@?XjzxE#8j5O}4|qSGC7*ReWu&~#(m=ME;WU{3N;e@HY8O; z?k?tn?0jn|l{=O{vV5A#?f=IcO0Dy3R$N44E{H=p)O4b%=cP#YNOnv)BZ%Z^K>m#? zduQZ6o1z_ZbA&9Of$yGAx>C@=VX1r>Y}qiTX{Q!ET45uaLQ5tvPZm{F)+bl67CV?e zz{L@xaJd3ku*?=mykl;CYk~WyAg|bc_QdB@giCA2X48G8(0FkGwkNO7Rb?zaP3`2+ z@E`D_jE2yxLm?b_fa{nU(NEmr+A6OR;HOZ?q9*;Pv5K($Gx>MpUFpkra!{o^TR)sHO>qDidNMtw+CT)@Ax z6VsBc4Tc=fm>2pgmN9#K4F^6^EVlC)J-QKL{mLE}IdJ;XZf$Oic7~K+;AMNjh#>_^ z%C92wmYrp%_x`4vRuWKr9(ukdj&TOy`#2#Ghbw1g`efcUcgHVD{G z2+(2Bc)Q=#@tjyS!BW!vj;TjkhtuUq@iWTBW(-XE9*0Fv(~YeM@9}9*e#ZE#EL99TKtC!uY{j4sb2w_?kMlkvN3O;xblgKq=h!8~7dm;2)cl-eHV!XYA zf+B{5Mqt|^IIA~fB(i~bVRwe9R6*&^CkrL%@x$b6q^x-}Iq2C7ob*M7JLhaI^4tIL z{vZ7O-#+?;vhQ#Ir(Y%e{(HTohLgOP zj(7Iy&OSY_eR|v6I*MrPwq&B+waW>Y`{!G?Nd;dhhZaAg^{6Hdca@yCGxenGt9W!U zqQ-RCrvj-=M!i9bv%8zot$Nr^x_kv*w?`>Wtm!C}PjHPF!{KvfVosoH4Y(-qpIgoFJk zwH^>*?4^@*(oM=V8z)(RoWlw`qhy$k>CM(Ii@cL1y&f&x%4|4+W%da%c82{V8>Gc( zhbiBum?8&l$TrSDAck`A0oPm2a_bCpIot}ey8QDzRsHhzGa6#wj<@mZzD@;hbY-S$ zJL^>S)*D~Cdwcb!YThi)?pd{dHI(X0PB^Wf-7Dr#^&PQa0#0`uaJpTk^hY=m>pBS{ zYuiXZp54V}a3nZ)NCAclC>3!G40@hEws@N4XJL@2qZrhdLZ;u??X`q!Mua&b{|Da|B`>d=k5RxH%7O45)_Pi`l1 zY%5#XUZE8(?u<2HH+8+lecdq1s|K%it-BeEC4n zT=UWnN;>k^J-a=N*Og$ueFhl91_m+~1vPf(7C%}oZak`~>Q0rTA1;UVP5hKDC9!Mx zH07%G#{HjqZxw4qVAQ%9w?1*pFgOgiY!dDF3p}m3$hWxKvr+~dP)Jf2-lWG|dyR_A z1uaC?tprIofO+mMVBjm3SlGE_XcxFe)sAHgD*P-#2btzfplewU7cpqNl9ngJyTsI7ft5 z{b{_s*jFMhP?s607=)AppzitX)wK8MHbki%lQz+Rb8B&u6{o7K8g{T9&(B5XDK$U` zS{ZAx%bgts?l$fsix934-uFb{e?}64j-G?!CnG*`Hmo9zQfyX6Ww$@<%#bH)Hl*!a z@6qbYU?=@JA2~E)1U71B85bcV!bd!+E(WfFEv{EDQ%ACi0wGnTCTj3bIUY?sVO7SB z-VM3`t?O0|kX3C)N&lugAhSz*d@0KKQGusma0X|k@;*?r&P_O|?R%-9;p$h4*&+kI z1X+c%tS*C$QhIA3tlIk+Ij}Ou70<5wd(dKPHl&A0Byq zum+&1_VA?;9|>fB@rHP$7Q&eR72mvHLJA&7(Xq6%6cg&m2--!VR{h zE0}IFES{tM$9!i0$ zDz(M*B7tnGr-M;<^ijEt{y1|eRg`%G5U5h>ctKYY%C0x6BePAY79YYoO#f+eGDEjm ztONY%E@L&upbio!@Itx`fSRaPX~D(zy6g^1M1CX}S3zMVe~`jq-cmy_6HtK{%LqBK zn9U=Gw`@6LsP9I#1=nwHz!TxC^wkKhQE;uUK{dEw*A+%Aw0a0M`w*>7FB?%9%R8*m;Fv0fDLB zRedufghp;?GbrxLhg?3SoY$9pH+f==3b*7#UnSl|W$v6uUj&uy%xgezK4ts2M&Cl5cuOxUpo;b` z^dk_~GBd(DbnP_ws#6Jhut-(T)?bhujtl0J&nCv-p#sqE?A5crW9U-28Bq;(z^Yk+ zl=pWq1X0s+nuZyj*#g9}glL{I>%(g)(Z}Q5MsM<|w$BUKHH02wwYmn_B>d_ayU`DR zjeZ%}THr^%i1(Wu-#KZ(KjZN_<^UA*SRzkS9GbR@Wv&MTy9sVDSLzZ_@%X5X0b=|J zY#NRIc6Bd!P5A+ghN7W(#$7*LI8Lp3GU+fZbrN zFYdu(!OD#z2!r1%KnXVP6}jF6OBSrW92L)V;NAgOX+@yfS)t=9{J3KH8!|zTg^Ee4 zOF+E9JIwp;6#lb!==Np&PM{RHB3L%bH(oy}uSR596}L71bbCHuJPIq^L|z-|7UI#o zYynXz>(E63oKejfqM$IMsWw|tj+3_lS;+&ll3Zct3ePaVu`#D$47dfiY&1m0JKU%m zI{Z^kKw~!R%6C#eG>=_+6H1DfiVG^ArgkN(ajKg3@Yq2t0lqcyS_7bULK8bReA&c5 ze}x7bi`fRDTHe07+CUntZfR(CUjz;}sFltU*ps##AVV$7;ogeIP+~d8y6y){BeN10 ze1|JUwCgtTBU-9a(H*tDhJF)uEk6UE{BsMI+mdL8&itexd((^D;&DV_ZQS_E)VLY$ zbELJtAG+SLFarxizKeb`x=hf;Im>D7d86F5wkZhCw|@KCDr>+ELAR)_jlS zqk!QUr-^NJt@DrEVSy*VZ?^+~qe8HN2YMZT=-u>~5!O_xTonavnbxeoig$7*Hh+$L zks~jYM*y2sLd!;ZLLgHSW1?1Gg7;!O7a^DPsR*kt1U+ICXW}VIXIB>vl+>`Po4AXq zdy=S0R460N#?zJqa8rf=t=t98#%!3@3*F}-`Qc46=gA{+8L`z zf}0`t527TBikbTvsr_NB%E4fwTq};uE9%E|7f>FCrcUNk2TX35Q&I3bs)SmFS*NVq zN+PRMRRxCz6t0?-(9y7fW_f?ZyvN%mUjkqahu2U{1@!Zl-u&ag`9Ix~{f{|w<;um2 zjrKr-=SF)VO6SlUHa5xnPz?}o;+S%t>ub8VK^VshP+@HfW#s?_(FuNNQ(&Tzy)}Kb z@AYYop-e&_2|NeU)noTk$JmMC67H?U_$nvh=U0tGMG{Wmm~(L?H##S9{k9xe&z?E- zT%cj2b=KB)xH|`7ZG<9-yK;5qifd}%-F0F3O3JeJ;Jbc1dTHo|7r!vBoaA4_u`Cd= zA@QwX2bgB;@4zI@S^eEbLTlxjqaOSarQ~>4y(dU2;wFp3Zf40TFb;yMzb?6hcyWoAgm_?=Fp{ znp-p34)3&B#U?4yc7&dkk~49!YHk8BK~keYN*4)Sf@lEN5Kar1%LG+HJgq`G$JL6q z%t@-S2HYMsCy-u6i8x=cT)B&xWdRlIX=JMqX3+@M>>9}9raLhoNRbh$e`&ANZQYnb zE=w98J0E(0b1Jqwp-@sYY6PgQBCqwv`*b-C?~@8AUT&)}epD0iq1r4#z*nyL;%0x+>DNaT46o5efOM)qJ(_!Cu55chE^W0K+Kz9M$-!38ttI=Ml;>YzB#oL6KI&O z`}0PQT6O@)VLk*tu!@f{YK&ql^zc%BX`#-OeGvE&k@MwVDg7hoFdf;XMcDnuO+2;l zB4lGc*m+0mrn;S)U0w*X+R3fxu#pjDtjO>!I;YgBXXT8{7gTrAii9slpbMzrk^-Zx z7;}tp@~U8)#$2bEDg2;i8$&{;bsY(!3Ut)4#baD~1J7GH{ru`G(<1E@;wPtI4@pq6 zi?D^$)(uwKDy{oiqv4xZ6dxfb30FcxC99+K8Gc)D!9Wu7Kq<3rh*-UiifuAYT7LuWG0CqF1-qEi&qME_?uh zb-!Fj8r+~Pv?`kmeTmWFfoBXsR0qW=^iizoI9%-Kdd_z06F@Mi)P_J)JafSfsPusf zssotg;@jDBJ&iaPlFnFd>y5x4BACwLyZGyki-a?ZTqM%GVM9FwY$U8*+hFaQ?!5bu zA5ctNFAyC+Nji{eturX|$|WXTak~g>C}GI&hA@T2yc1mn9Il`Y`85OoLtZpqY; z@o#AbCt;>%*n zL*O^xx}=(bpZa^cv1~NyO^RaB>ks?Abd+^^$#Brg(@{a+FiNkINtySG9z}!0L6P+G zr0fm2xM?&Q=d=^+j@!GMfL%=hmNIEC&%Q_Yy4fsIg(ADe{b*Gtdecs5`Y4dI`%Ccs zhyR1iuL*Ns{4f3mYFv9|K5P#uXX$fo*d*x%lxcrs$q>g_|wO$j`%iJi!`sdnGn!G8J(hMUdsDvR`_0p+5^*qJNgiavcUebDhT+xLZS?6xUVy8u z2l%XT8DDL^H=RE$^4na^^kDk%fu54)Z3b}Y%!uVNU!f&)+Q!HfOp@2lgKct`D7Vj@ zV7Lw}h0kqfYwVcH&0X1kGuZ<&9tc7C&ZJM9bz`};%`zPIbOg(k$!OOPVI_n@ljer2 zGr>Wqgl(M3KlWok)-nUoQwXlpdeAOxvaQ{dxrB>pk4Qyrn0*EF!P5dTAJ2j)ZaiGU z_2FnQpp?Mef?0p+?|lAMnDt*-{ev%2dRR^h`VsVcol)5D0Db$V}B7VXhTr`(B(n45bQQPj!vq4pQ7o_C4#)c2U-e+)?!5(@gl{E6DCTy-WX#Nu(9ip5)uyeiSU zHQz@iw+NqJxL3?9w4mInR!kWSZ2#tz*y|c`=%Nd+=qaWjuR+ZHQAF4wS%D$_GPcPz zK)9%@1Rt(B5(ha+7i&0R@gx(KsumNTw;(ZJ%@$9R*_xinzJ3B);TxU<X9aoNEwGK3b zm~O%86C@AI2wqP?-1h`n*tpf5DTd&={1&^3_X%nZs3JuEU1RifG*{#U#lxg7UC$Bw z+-3#A2eAiDYqS(xU1a!9iN(ofqK-_NPp`frfaR+jWtJEO@s-c35_c-%* zV8b1P!4o(Z*`XWhqc$vNgz|?LY|oeh*)(vt<8Y{k3_hdyUL>3dh(NeMtx)_Isv7aY zKef?Y4S-hUsA1^VZE+SM8*DP<5mubY_^d|V{GGS-uVFQy9EsDUQE1ko@~pc z2!jmIEgaWRxXRV>SG+dd(1sIbG)zHW{W}C>ZeM6y4k+wbBNJ1vPN#K42C=thKVTS#^FuQYmW|Sc35w-U#pr zUdyRV6DQ788k4185iGvLQ!{sP&U^1MZ}NSq>~

I^LukJkUTX+Ow3gsG&5PnTGgG&TvfNOa3YAx(hH8Pn8`kpQYS6D*pX zxo(Spf)5$8kV0l}d;tc4()Yv`elOe?VJH%x;4)c%!bDC)RHoWc!vbDdT|&je;F2*Z z7q)v^1~zfsE-k#ZXOu!$3sc&V*MfWi_5A}fx-3B(X0x^hTSD*#JE;$tbW4YuUCC~l z@y7GqxOWZJSS%OUyY|N#%_{>{D8bI%NW=k6a3dlE9E}}ue+5RHfe^07t=muld?F+h zN`MC}xFOPKL&km34n>IM=IWA(=&-R-YE(24E>zU^!J485N_jlD8E^%YrfR$E1JBEDf-oRI)m>!T#>CFJMAI9?d0^fcSCOyq0Zc(r z(NpMeAy%b^#u#$Vm=s)lUn4m<-MJ}$_Y4N%v+%y>F zmb*}{NeZMi6r#ri5&jZhWMrjnHcNvj$`iHAt7mFm6T6z(l^b;m;*+s>1{n$<-1bG6 zh1Dt7+K=8r6|o}#xgi5WaR&ZH%Dh+tYJ@Id;e;(J<4Rn(J3Z@r5x1ocyxB~-rUFdl_l?<>i_rqAOFvC+5c4aZMsi7z3$+na;g7u z7Tn*LjlY>gcPcgpa7gu+oq^Gw*N2MMP+2G-z6&2cgP1eu-6s zVIe~WgC{2DJHa-%kWJx>RPCiWjVX7YV&@A(3P>4Y7I+tSJ2b-on!~GPto#{5vdn+h zuwLZJ6A%(pbgGnGqcc4oIe1;$tg0j7E}DPU%^~7(h|io8g3Oz%SB)~TahTusj4$$~ z?EMBKe;WnAc9x^d7x06(TVF+d?4kHsvm2AzV&_!`kwe0GRPDlObVM9w!DaV0-PcZ@C3W8FJ!LgaN_undX|IS*kp$#ep<{_zu-tgsKpclsqX3nCx`VP6 z>gfgipFWZqhN@9C;r}Ioya|0*AHW1J4~Pa2+zGoaE=heEP^zY-&U>%*u9_xl zjxhb1$R@Q*&Kiw>#OlI0E3D)OrN$IJ;ik6m=N2i-1YS5ySR1`YA8@9`9+P&b1$>Ps z8?14gORG7M2EP}IrqL28_$`(~Blpf|K*Lw5w{fN0ta9j*pdcNRjAM)WQv%4zTW<;6 zn0R1i`>7}9C+VYg_V#XTf$0V-lr(nw&_|8+C6*0%6||$IXr>;hKikV)c5Y# z0fsEnz!bq6?=?6b3yu^BK3bCYUhC#`-=w4Sb5q~~wd06Y9RRY~ zIFkh1VvZq%%v5wqvW)}8uOFJDNJI1P3*v*A&2T2~`+?xY&r8(?^sh6y88v0+F;Cgw(zVOIo9Q`tQ!lo z0^a>5O)x_cBd3(VO0JmuTeCYCu;GKOVm6S#E0{1MM(2X-~a5-k+xnVLzw!WdQL-0vyE5;abQtmCp*-+t~eAi2tf3kb#X0=&gAQMxO!d@K%f!lQCc6`z})TT zJUOl9t~2e7S#{)Yv^vAsc~}Fe_KVj z(S*nQWDLr5Zk?0X*MNoKqXZuoZuJ)G3xq?^tW9Ir%YD*zym120SSL@hJ=bm%)4_30 zo4-ZF`)s_KS@3Ja%9&H>2K&cU3B4_zfqF~=+n(tHR23Mh%M|4di>e(>W$3v6(s1=O znV6q;WJlm>(0|Oipnj5i)O?b!I6qbLpb3Wt4d<=@|NUQh{qxrU|Ha;gm;MQV?f&c_u!F!30y_xo zAh3hLSrGWm&wYyI+i!KgeTU@RZihepF;x=%HsuoB`JxgDN+CM!te1^CqjphFy4)2u zNs4whOuM7B7x>7z&SX-SRPS`UX|Fxh4MiDvIT}xflYXaHjJcz3FqsVVe%YU7By@I? z-jKWNCc`4x-B9!P4pYyKsrb&GERGfPDVC(|Ddwz8PjGf95{fU5LJQnrB$?8Rp}4kx zP03dILhX3v>pNcC&u@NSbv5=pl{A+R)@!;GzV+78^ur`go?OF}dJB)iTl;02T{}<6=C@`n`6ilXW^Jw|Mo2DP?Wl0Z5+r(_S*lx=A`5 zwtHPJ?;7^$P zD8msejSAb}?-|%59Yl$F3Tcw!uMB4rTBr%atSy#wbWU-}(eTv}V3j;1zms$%bxM`P ztEJ9?dSr3vGpxAc;A)GEZ)+Hi`|rTwWAaj1Xw^Q!Wx%?WI_m8T)M!k7O?YT^ypJi3 zn}8;%j&0FVMPR|Rpjr6F$`p#RC@vYMS~1dMzy>nBuwDNjIP zatKv1b?PJN#kben8_@W3J}=gL-8MHq4!iB^zxVfl=jJc`^{$khDn_APlHkVh2A&V|{is1LUjo9UH>MAUH z@L4V0u>FzcA^h=_aYT{c?7jpR+;2vXLhcVqPZu{nkEf+tzf?^N9}{Z;4PvHWxZDZ< zp7mqg44%nFro_rk-Ju#PfyPx#XvCU|ROz4xJ}RK5KKSmhRA!9&o69@d`YKL!Tu{xG zYlZIT!dqzZ2hs-T$q|gfIy1(VRS6Kf?Zl`HYM|jhO_Hs%EQqakh_s42Ac$rD*Q~Mu3{`qsBkC`ut{<*UF~CkuuDkNoYVOGs&2mS z5%kuD;0t;C7R48z7XNjZH{3%l5OJVi zx$;1Dj$%AW8L@Ib7Tc_-bYTw^(=}Sga=l1+JI={(u0d$)b>e(Z&PxYa^L7bWR_EwPo*QB zbOZ%Ac!sdsx|s^fiYM)noueEY?xrIbeF2iff0RTGR-K}z5P8{Xj~xi;?sD-p>bLgZ zfmWy&vK9;4PJm`ulBEIxckqBz4IX4Ny%uDjN9)8wtyo*x{~P_jeCeEXtbJ1WBs6nX zu%Pa89D{Gn1^Aonjc`Js9z`t`smPv$RkH^Rpg~}crD4@9(mrP#Pr>z`K^mE_V$-Or zPy>wLvPJ~GLyO(|#aBlZOYvNUy;QIIP?`iKzRTW0B7;A*{9C!i}P6KUeP6 zAhuJvSCoz3mG>hK&^k;X3)dY8i^nzH5V#6`!|Q9EltltX*u5vx&^-J_CY#!h=|GzZ zg5r`?o#0*5(b7C+nbVB~8mRAjRozlA-teRGE6YT%9Zr!FcGu1p1dTQH9Cw||Am~f@ zg2233e0V^zk2s}5K$a=eB#Zi>%qyU*B_|4)O6(0>tyPC*>8v^uvhTwMgLN9nUX>Ep z*oP44r`J@PPyw8Wu>)dg-4(^k z0FO5oZrid1yQM|?emT3Q26C$ZTs*BhUH)0?M*Hx5N~LHCEwwx`Z6%@ctJq@1ch(?0 zLK&0=6mGi{(8!O3OCoUW<$@6Od>_*>=n~Mx3~Y?NvPyd)cR@;Qg>KJUYDC=dUZ^>? z)q9p}z++Eo^O$&Se!2ydt4+1n4jhY0|y4~zla6KN=R$;1TMxL`NSR{3+}Kp=}HS^SQVNljO$=cibpvXrzGU9TItun5~KRW}k-n)F%0Op9a}O8bd#S6o}) zRo2^fFw3TeQynIIy=$GlQ9Ip3RaB0{tT6lkU-_3u7XN?#=PrEWuk+XL&kh1R2>f9| z;9nbmTG5xE{7avrquO9Zw6B{^2Hi5v^7eQz7!|#&P5ZxckaW_tNc*D+KInqZf0)<9 z;iznPlF=kBJ89A%4t8G8gh9B8g zupeEsHw%dQ;aI%n0Gmcyt3*7F$PQQLmO11^o3RD^`q5NXin`5%3N5n><)*C*7L@v^ zVd@(gEQS-iLkc%maE+U_cwq4Km05EKMd>q^&k7SmtdWQ z>N@?-xP*Bo?LjdbWktK+g-=cfG*Zp7oVa$Gk$TZd+Wgi|OK@^n6zP~aV1JzLSm%y) zen+emkC%B_@YoQ-GyWNpsWCx~Myz~?By-B`v62*?r~}*Bv6YV&>*bu>wiuU5itryL z1gR&3bddMPlkspop_N&8m}cc*+$p=|aMDiE491fVJ%W1!qSM5xC&Q#O%95m)WaT(3 zd!1e{o%GUnQVhH37wM$v^t17Jn2blmcE8(FsvXlc?E}i2!_p7N$Nc9a@u6z;i zKKUZ67!SaS<#F^CtwD%^URv^2m=hu7 z+8Mr+s=pp?x<>6IQ>zn(Z2Djvuah$+?eE6iBk3O&bhMS+4PL+dev7CApxnd5mFS?K0-W3z$KK`7d7#S})b)yGHSUb6 zZ$mJbcoiF=F<`vDTkUAL$6H9yY;m{6Cy8gI>4S9q-|)=_7r5M2s;rSSDY_8t-0;kV zelmQeu&}vixH^ai6+ZSKDVs%QPze)@Ty7Kwv)1mrq1cq<_r^0;CYPeoDuBwaM}&BETo95_}_w&w1tW| z*y&VzylvHTaibg`;WH1j?J54k%pS=^o0p&1*61`M$x}sI!wL-rw;h_@iL2<&sSTPZ zc^eg<4yTCMoLXo8p`ja6TXdjAFN6&XG&PsfeUzsf4DoJl25&$lgm|rW#P&%& zP$XptCah4}`pKT(MH!J0UZKeUcLR!mzRg2MXaQ|w=yzj}FsiKmO6VGcZh9+?)%gUX z_unwxG{sXvmHATV(dy;^&)CXS@q)15O2Km1AU`C)WCB{$u-RsyNv0>zy9R2F;FfL$oT^?o zLaqDtou}b@6f`o#Vmc1=WQhpK^^F4I5cd=IY$A@c(MAePB{l;uA>S=@uyn<%`d%tU zq?{_Rtx~`Oc}ih0SYMBg00iprr9h-jS(5<$BCfuvvothRMeDGPly6AeJM|Wh@5jwI zdbt9SkkJ;n>-8!aZ&Ti~CYWOBT91FJ;R4xL8xv`5SyfOr!jdY)^@aFO=xZWe!{<;< z6=R?wvF2V_B1r?f&^#{YDI|5qNmqLn>BLlls_h!=O1H@Aj*FnxiR_J!+f9+!h1T-O zP?E@d^Lj7>(OtDQh$pFI79jb~ayT~Q?>Ii~3c|B6GO~u#fVOe$T$-0+ zH}w1_nn4hLDfTTA5v`4Z3rYoWmTu0anHqP=lKO^yBuKhktE-(( z*;3Gq57tRUm5~z&Aaq)Wy6Sc_OmHsRZz5gj1=B#tO{>pxl6*rfUbALYbK{B5>ZH3G^!U)Rp(GVgM)$oD`*)z+Rw@``G1_Bin5dzqUmW5rAtcl> zoT-JG%cZBs|G)j*e{}K77XN?YzJH33uC5K(I60%-OFfQqL zy5nxz?sYocsg?KJT;w_E4?6A6U^MEE_%=-mOScC-0@LGcYjBMepY{(z{iqjDaCFtF zQ8T069*x`WCjt5i|ldY3N~^8KwMNkgJHk*?t3mCb@p zD>7J7$fZiMa(O%@ma7uZs?rpliqKS5S9*iU64qw*wPJTu=2ne^mt%&yOQJ7T(WO;f zN_R9jfjXYkBM=*WO8O1s%b%^@~NDx{}r%Oc@Rpw`xX9(uXIniZwRugC+#Nyp%+D8DI1qIQ@Ak>!T#Z7qW)U9|cX*C23* zO}XGW&3A8}zKz%!T7F90OXjdyZrYW~i|r`A?%0EDem)$H8x)$RiE_4kfyk3*6o$j^ zi-!SE07WNwNCFtd#|#wbY|r)W=lnRW(bWbH2c+sWi}%jBC?@b;P3aA4q7 zay%Zr(;kfIF>oT1ckh_817-DBH+0~3bML!zqsjp+Hd Date: Wed, 2 Oct 2024 18:43:43 +0000 Subject: [PATCH 79/80] remove comment --- validators/weight_setter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/validators/weight_setter.py b/validators/weight_setter.py index f63a313a..22e53cfb 100644 --- a/validators/weight_setter.py +++ b/validators/weight_setter.py @@ -494,7 +494,7 @@ async def consume_organic_queries(self): forward_fn=self.embeddings, blacklist_fn=self.blacklist_embeddings, ) - # self.axon.serve(netuid=self.netuid, subtensor=self.subtensor) + self.axon.serve(netuid=self.netuid, subtensor=self.subtensor) print(f"axon: {self.axon}") self.axon.start() bt.logging.info(f"Running validator on uid: {self.my_uid}") From cb3ffb613e034c94049234a4c63957dd22bf038c Mon Sep 17 00:00:00 2001 From: acer-king Date: Wed, 2 Oct 2024 12:35:38 -0700 Subject: [PATCH 80/80] add cache.db to gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9cca6664..cb1ad171 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,5 @@ validators/.ipynb_checkpoints/ **/Cortex.t.egg-info **/test.ipynb .env -**/server/**/*.py \ No newline at end of file +**/server/**/*.py +cache.db \ No newline at end of file