API Calling: Web APIs

BLAST

Module for handling BLAST API interactions.

Provides functionality for building queries, fetching results, and interpreting BLAST (Basic Local Alignment Search Tool) sequence alignment data.

`BlastFetcher`

Bases: BaseFetcher

A class for retrieving API results from BLAST.

Retrieves results from BLAST given a parameterised BlastQuery.

TODO add a limit of characters to be returned from the response.text?

Source code in biochatter/api_agent/web/blast.py

class BlastFetcher(BaseFetcher):
    """A class for retrieving API results from BLAST.

    Retrieves results from BLAST given a parameterised BlastQuery.

    TODO add a limit of characters to be returned from the response.text?
    """

    def _submit_query(self, request_data: BlastQueryParameters) -> str:
        """POST the BLAST query and retrieve the RID.

        The method submits the structured BlastQuery object and returns the RID.

        Args:
        ----
            request_data: BlastQuery object containing the BLAST query
                parameters.

        Returns:
        -------
            str: The Request ID (RID) for the submitted BLAST query.

        """
        data = {
            "CMD": request_data.cmd,
            "PROGRAM": request_data.program,
            "DATABASE": request_data.database,
            "QUERY": request_data.query,
            "FORMAT_TYPE": request_data.format_type,
            "MEGABLAST": request_data.megablast,
            "HITLIST_SIZE": request_data.max_hits,
        }
        # Include any other_params if provided
        if request_data.other_params:
            data.update(request_data.other_params)
        # Make the API call
        query_string = urlencode(data)
        # Combine base URL with the query string
        full_url = f"{request_data.url}?{query_string}"
        # Print the full URL
        request_data.full_url = full_url
        print("Full URL built by retriever:\n", request_data.full_url)
        response = requests.post(request_data.url, data=data, timeout=10)
        response.raise_for_status()
        # Extract RID from response
        print(response)
        match = re.search(r"RID = (\w+)", response.text)
        if match:
            return match.group(1)

        msg = "RID not found in BLAST submission response."
        raise ValueError(msg)

    def _fetch_results(
        self,
        rid: str,
        question_uuid: str,
        retries: int = 10000,
    ) -> str:
        """Fetch BLAST query data given RID.

        The second function to be called for a BLAST query.
        """
        base_url = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"
        check_status_params = {
            "CMD": "Get",
            "FORMAT_OBJECT": "SearchInfo",
            "RID": rid,
        }
        get_results_params = {
            "CMD": "Get",
            "FORMAT_TYPE": "XML",
            "RID": rid,
        }

        # Check the status of the BLAST job
        for attempt in range(retries):
            status_response = requests.get(base_url, params=check_status_params, timeout=10)
            status_response.raise_for_status()
            status_text = status_response.text
            print("evaluating status")
            if "Status=WAITING" in status_text:
                print(f"{question_uuid} results not ready, waiting...")
                time.sleep(15)
            elif "Status=FAILED" in status_text:
                msg = "BLAST query FAILED."
                raise RuntimeError(msg)
            elif "Status=UNKNOWN" in status_text:
                msg = "BLAST query expired or does not exist."
                raise RuntimeError(msg)
            elif "Status=READY" in status_text:
                if "ThereAreHits=yes" in status_text:
                    print(f"{question_uuid} results are ready, retrieving.")
                    results_response = requests.get(
                        base_url,
                        params=get_results_params,
                        timeout=10,
                    )
                    results_response.raise_for_status()
                    return results_response.text
                return "No hits found"
            if attempt == retries - 1:
                msg = "Maximum attempts reached. Results may not be ready."
                raise TimeoutError(msg)
        return None

    def fetch_results(
        self,
        query_models: list[BlastQueryParameters],
        retries: int = 20,
    ) -> str:
        """Submit request and fetch results from BLAST API.

        Wraps individual submission and retrieval of results.

        Args:
        ----
            query_models: list of Pydantic models of the queries
            retries: the number of maximum retries

        Returns:
        -------
            str: the result from the BLAST API

        """
        # For now, we only use the first query in the list
        query = query_models[0]
        rid = self._submit_query(request_data=query)
        return self._fetch_results(
            rid=rid,
            question_uuid=query.question_uuid,
            retries=retries,
        )

`_fetch_results(rid, question_uuid, retries=10000)`

Fetch BLAST query data given RID.

The second function to be called for a BLAST query.

Source code in biochatter/api_agent/web/blast.py

def _fetch_results(
    self,
    rid: str,
    question_uuid: str,
    retries: int = 10000,
) -> str:
    """Fetch BLAST query data given RID.

    The second function to be called for a BLAST query.
    """
    base_url = "https://blast.ncbi.nlm.nih.gov/Blast.cgi"
    check_status_params = {
        "CMD": "Get",
        "FORMAT_OBJECT": "SearchInfo",
        "RID": rid,
    }
    get_results_params = {
        "CMD": "Get",
        "FORMAT_TYPE": "XML",
        "RID": rid,
    }

    # Check the status of the BLAST job
    for attempt in range(retries):
        status_response = requests.get(base_url, params=check_status_params, timeout=10)
        status_response.raise_for_status()
        status_text = status_response.text
        print("evaluating status")
        if "Status=WAITING" in status_text:
            print(f"{question_uuid} results not ready, waiting...")
            time.sleep(15)
        elif "Status=FAILED" in status_text:
            msg = "BLAST query FAILED."
            raise RuntimeError(msg)
        elif "Status=UNKNOWN" in status_text:
            msg = "BLAST query expired or does not exist."
            raise RuntimeError(msg)
        elif "Status=READY" in status_text:
            if "ThereAreHits=yes" in status_text:
                print(f"{question_uuid} results are ready, retrieving.")
                results_response = requests.get(
                    base_url,
                    params=get_results_params,
                    timeout=10,
                )
                results_response.raise_for_status()
                return results_response.text
            return "No hits found"
        if attempt == retries - 1:
            msg = "Maximum attempts reached. Results may not be ready."
            raise TimeoutError(msg)
    return None

`_submit_query(request_data)`

POST the BLAST query and retrieve the RID.

The method submits the structured BlastQuery object and returns the RID.

request_data: BlastQuery object containing the BLAST query
    parameters.

str: The Request ID (RID) for the submitted BLAST query.

Source code in biochatter/api_agent/web/blast.py

def _submit_query(self, request_data: BlastQueryParameters) -> str:
    """POST the BLAST query and retrieve the RID.

    The method submits the structured BlastQuery object and returns the RID.

    Args:
    ----
        request_data: BlastQuery object containing the BLAST query
            parameters.

    Returns:
    -------
        str: The Request ID (RID) for the submitted BLAST query.

    """
    data = {
        "CMD": request_data.cmd,
        "PROGRAM": request_data.program,
        "DATABASE": request_data.database,
        "QUERY": request_data.query,
        "FORMAT_TYPE": request_data.format_type,
        "MEGABLAST": request_data.megablast,
        "HITLIST_SIZE": request_data.max_hits,
    }
    # Include any other_params if provided
    if request_data.other_params:
        data.update(request_data.other_params)
    # Make the API call
    query_string = urlencode(data)
    # Combine base URL with the query string
    full_url = f"{request_data.url}?{query_string}"
    # Print the full URL
    request_data.full_url = full_url
    print("Full URL built by retriever:\n", request_data.full_url)
    response = requests.post(request_data.url, data=data, timeout=10)
    response.raise_for_status()
    # Extract RID from response
    print(response)
    match = re.search(r"RID = (\w+)", response.text)
    if match:
        return match.group(1)

    msg = "RID not found in BLAST submission response."
    raise ValueError(msg)

`fetch_results(query_models, retries=20)`

Submit request and fetch results from BLAST API.

Wraps individual submission and retrieval of results.

query_models: list of Pydantic models of the queries
retries: the number of maximum retries

str: the result from the BLAST API

Source code in biochatter/api_agent/web/blast.py

def fetch_results(
    self,
    query_models: list[BlastQueryParameters],
    retries: int = 20,
) -> str:
    """Submit request and fetch results from BLAST API.

    Wraps individual submission and retrieval of results.

    Args:
    ----
        query_models: list of Pydantic models of the queries
        retries: the number of maximum retries

    Returns:
    -------
        str: the result from the BLAST API

    """
    # For now, we only use the first query in the list
    query = query_models[0]
    rid = self._submit_query(request_data=query)
    return self._fetch_results(
        rid=rid,
        question_uuid=query.question_uuid,
        retries=retries,
    )

`BlastInterpreter`

Bases: BaseInterpreter

A class for interpreting BLAST results.

Source code in biochatter/api_agent/web/blast.py

class BlastInterpreter(BaseInterpreter):
    """A class for interpreting BLAST results."""

    def summarise_results(
        self,
        question: str,
        conversation_factory: Callable,
        response_text: str,
    ) -> str:
        """Extract the answer from the BLAST results.

        Args:
        ----
            question (str): The question to be answered.
            conversation_factory: A BioChatter conversation object.
            response_text (str): The response.text returned by NCBI.

        Returns:
        -------
            str: The extracted answer from the BLAST results.

        """
        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are a world class molecular biologist who knows everything about NCBI and BLAST results.",
                ),
                ("user", "{input}"),
            ],
        )
        summary_prompt = BLAST_SUMMARY_PROMPT.format(
            question=question,
            context=response_text,
        )
        output_parser = StrOutputParser()
        conversation = conversation_factory()
        chain = prompt | conversation.chat | output_parser
        return chain.invoke({"input": {summary_prompt}})

`summarise_results(question, conversation_factory, response_text)`

Extract the answer from the BLAST results.

question (str): The question to be answered.
conversation_factory: A BioChatter conversation object.
response_text (str): The response.text returned by NCBI.

str: The extracted answer from the BLAST results.

Source code in biochatter/api_agent/web/blast.py

def summarise_results(
    self,
    question: str,
    conversation_factory: Callable,
    response_text: str,
) -> str:
    """Extract the answer from the BLAST results.

    Args:
    ----
        question (str): The question to be answered.
        conversation_factory: A BioChatter conversation object.
        response_text (str): The response.text returned by NCBI.

    Returns:
    -------
        str: The extracted answer from the BLAST results.

    """
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are a world class molecular biologist who knows everything about NCBI and BLAST results.",
            ),
            ("user", "{input}"),
        ],
    )
    summary_prompt = BLAST_SUMMARY_PROMPT.format(
        question=question,
        context=response_text,
    )
    output_parser = StrOutputParser()
    conversation = conversation_factory()
    chain = prompt | conversation.chat | output_parser
    return chain.invoke({"input": {summary_prompt}})

`BlastQueryBuilder`

Bases: BaseQueryBuilder

A class for building a BlastQuery object.

Source code in biochatter/api_agent/web/blast.py

class BlastQueryBuilder(BaseQueryBuilder):
    """A class for building a BlastQuery object."""

    def create_runnable(
        self,
        query_parameters: "BlastQueryParameters",
        conversation: "Conversation",
    ) -> Callable:
        """Create a runnable object for executing queries.

        Creates a runnable using the LangChain
        `create_structured_output_runnable` method.

        Args:
        ----
            query_parameters: A Pydantic data model that specifies the fields of
                the API that should be queried.

            conversation: A BioChatter conversation object.

        Returns:
        -------
            A Callable object that can execute the query.

        """
        return create_structured_output_runnable(
            output_schema=query_parameters,
            llm=conversation.chat,
            prompt=self.structured_output_prompt,
        )

    def parameterise_query(
        self,
        question: str,
        conversation: "Conversation",
    ) -> list[BlastQueryParameters]:
        """Generate a BlastQuery object.

        Generates the object based on the given question, prompt, and
        BioChatter conversation. Uses a Pydantic model to define the API fields.
        Creates a runnable that can be invoked on LLMs that are qualified to
        parameterise functions.

        Args:
        ----
            question (str): The question to be answered.

            conversation: The conversation object used for parameterising the
                BlastQuery.

        Returns:
        -------
            BlastQuery: the parameterised query object (Pydantic model)

        """
        runnable = self.create_runnable(
            query_parameters=BlastQueryParameters,
            conversation=conversation,
        )
        blast_call_obj = runnable.invoke(
            {"input": f"Answer:\n{question} based on:\n {BLAST_QUERY_PROMPT}"},
        )
        blast_call_obj.question_uuid = str(uuid.uuid4())
        return [blast_call_obj]

`create_runnable(query_parameters, conversation)`

Create a runnable object for executing queries.

Creates a runnable using the LangChain create_structured_output_runnable method.

query_parameters: A Pydantic data model that specifies the fields of
    the API that should be queried.

conversation: A BioChatter conversation object.

A Callable object that can execute the query.

Source code in biochatter/api_agent/web/blast.py

def create_runnable(
    self,
    query_parameters: "BlastQueryParameters",
    conversation: "Conversation",
) -> Callable:
    """Create a runnable object for executing queries.

    Creates a runnable using the LangChain
    `create_structured_output_runnable` method.

    Args:
    ----
        query_parameters: A Pydantic data model that specifies the fields of
            the API that should be queried.

        conversation: A BioChatter conversation object.

    Returns:
    -------
        A Callable object that can execute the query.

    """
    return create_structured_output_runnable(
        output_schema=query_parameters,
        llm=conversation.chat,
        prompt=self.structured_output_prompt,
    )

`parameterise_query(question, conversation)`

Generate a BlastQuery object.

Generates the object based on the given question, prompt, and BioChatter conversation. Uses a Pydantic model to define the API fields. Creates a runnable that can be invoked on LLMs that are qualified to parameterise functions.

question (str): The question to be answered.

conversation: The conversation object used for parameterising the
    BlastQuery.

BlastQuery: the parameterised query object (Pydantic model)

Source code in biochatter/api_agent/web/blast.py

def parameterise_query(
    self,
    question: str,
    conversation: "Conversation",
) -> list[BlastQueryParameters]:
    """Generate a BlastQuery object.

    Generates the object based on the given question, prompt, and
    BioChatter conversation. Uses a Pydantic model to define the API fields.
    Creates a runnable that can be invoked on LLMs that are qualified to
    parameterise functions.

    Args:
    ----
        question (str): The question to be answered.

        conversation: The conversation object used for parameterising the
            BlastQuery.

    Returns:
    -------
        BlastQuery: the parameterised query object (Pydantic model)

    """
    runnable = self.create_runnable(
        query_parameters=BlastQueryParameters,
        conversation=conversation,
    )
    blast_call_obj = runnable.invoke(
        {"input": f"Answer:\n{question} based on:\n {BLAST_QUERY_PROMPT}"},
    )
    blast_call_obj.question_uuid = str(uuid.uuid4())
    return [blast_call_obj]

`BlastQueryParameters`

Bases: BaseModel

Pydantic model for the parameters of a BLAST query request.

The class is used for configuring and sending a request to the NCBI BLAST query API. The fields are dynamically configured by the LLM based on the user's question.

Source code in biochatter/api_agent/web/blast.py

class BlastQueryParameters(BaseModel):
    """Pydantic model for the parameters of a BLAST query request.

    The class is used for configuring and sending a request to the NCBI BLAST
    query API. The fields are dynamically configured by the LLM based on the
    user's question.

    """

    url: str | None = Field(
        default="https://blast.ncbi.nlm.nih.gov/Blast.cgi?",
        description="ALWAYS USE DEFAULT, DO NOT CHANGE",
    )
    cmd: str | None = Field(
        default="Put",
        description="Command to execute, 'Put' for submitting query, 'Get' for retrieving results.",
    )
    program: str | None = Field(
        default="blastn",
        description=(
            "BLAST program to use, e.g., 'blastn' for nucleotide-nucleotide BLAST, "
            "'blastp' for protein-protein BLAST."
        ),
    )
    database: str | None = Field(
        default="nt",
        description=(
            "Database to search, e.g., 'nt' for nucleotide database, 'nr' for "
            "non redundant protein database, 'pdb' the Protein Data Bank "
            "database, which is used specifically for protein structures, "
            "'refseq_rna' and 'refseq_genomic': specialized databases for "
            "RNA sequences and genomic sequences"
        ),
    )
    query: str | None = Field(
        None,
        description=(
            "Nucleotide or protein sequence for the BLAST or blat query, "
            "make sure to always keep the entire sequence given."
        ),
    )
    format_type: str | None = Field(
        default="Text",
        description="Format of the BLAST results, e.g., 'Text', 'XML'.",
    )
    rid: str | None = Field(
        None,
        description="Request ID for retrieving BLAST results.",
    )
    other_params: dict | None = Field(
        default={"email": "user@example.com"},
        description="Other optional BLAST parameters, including user email.",
    )
    max_hits: int | None = Field(
        default=15,
        description="Maximum number of hits to return in the BLAST results.",
    )
    sort_by: str | None = Field(
        default="score",
        description="Criterion to sort BLAST results by, e.g., 'score', 'evalue'.",
    )
    megablast: str | None = Field(
        default="on",
        description="Set to 'on' for human genome alignemnts",
    )
    question_uuid: str | None = Field(
        default_factory=lambda: str(uuid.uuid4()),
        description="Unique identifier for the question.",
    )
    full_url: str | None = Field(
        default="TBF",
        description="Full URL to be used to submit the BLAST query",
    )

OncoKB

OncoKB API agent.

`OncoKBFetcher`

Bases: BaseFetcher

A class for retrieving API results.

Retrieve from OncoKB given a parameterized OncoKBQuery.

Source code in biochatter/api_agent/web/oncokb.py

class OncoKBFetcher(BaseFetcher):
    """A class for retrieving API results.

    Retrieve from OncoKB given a parameterized OncoKBQuery.
    """

    def __init__(self, api_token="demo"):
        self.headers = {
            "Authorization": f"Bearer {api_token}",
            "Accept": "application/json",
        }
        self.base_url = "https://demo.oncokb.org/api/v1"

    def fetch_results(
        self,
        request_data: list[OncoKBQueryParameters],
        retries: int | None = 3,
    ) -> str:
        """Submit the OncoKB query and fetch the results directly.

        No multi-step procedure, thus no wrapping of submission and retrieval in
        this case.

        Args:
        ----
            request_data: List of OncoKBQuery objects (Pydantic models)
                containing the OncoKB query parameters.

            retries: The number of retries to fetch the results.

        Returns:
        -------
            str: The results of the OncoKB query.

        """
        # For now, we only use the first query in the list
        query = request_data[0]

        # Submit the query and get the URL
        params = query.dict(exclude_unset=True)
        endpoint = params.pop("endpoint")
        params.pop("question_uuid")
        full_url = f"{self.base_url}/{endpoint}"
        response = requests.get(full_url, headers=self.headers, params=params)
        response.raise_for_status()

        # Fetch the results from the URL
        results_response = requests.get(response.url, headers=self.headers)
        results_response.raise_for_status()

        return results_response.text

`fetch_results(request_data, retries=3)`

Submit the OncoKB query and fetch the results directly.

No multi-step procedure, thus no wrapping of submission and retrieval in this case.

request_data: List of OncoKBQuery objects (Pydantic models)
    containing the OncoKB query parameters.

retries: The number of retries to fetch the results.

str: The results of the OncoKB query.

Source code in biochatter/api_agent/web/oncokb.py

def fetch_results(
    self,
    request_data: list[OncoKBQueryParameters],
    retries: int | None = 3,
) -> str:
    """Submit the OncoKB query and fetch the results directly.

    No multi-step procedure, thus no wrapping of submission and retrieval in
    this case.

    Args:
    ----
        request_data: List of OncoKBQuery objects (Pydantic models)
            containing the OncoKB query parameters.

        retries: The number of retries to fetch the results.

    Returns:
    -------
        str: The results of the OncoKB query.

    """
    # For now, we only use the first query in the list
    query = request_data[0]

    # Submit the query and get the URL
    params = query.dict(exclude_unset=True)
    endpoint = params.pop("endpoint")
    params.pop("question_uuid")
    full_url = f"{self.base_url}/{endpoint}"
    response = requests.get(full_url, headers=self.headers, params=params)
    response.raise_for_status()

    # Fetch the results from the URL
    results_response = requests.get(response.url, headers=self.headers)
    results_response.raise_for_status()

    return results_response.text

`OncoKBInterpreter`

Bases: BaseInterpreter

Source code in biochatter/api_agent/web/oncokb.py

class OncoKBInterpreter(BaseInterpreter):
    def summarise_results(
        self,
        question: str,
        conversation_factory: Callable,
        response_text: str,
    ) -> str:
        """Extract the answer from the BLAST results.

        Args:
        ----
            question (str): The question to be answered.
            conversation_factory: A BioChatter conversation object.
            response_text (str): The response.text returned by OncoKB.

        Returns:
        -------
            str: The extracted answer from the BLAST results.

        """
        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are a world class molecular biologist who knows "
                    "everything about OncoKB and cancer genomics. Your task is "
                    "to interpret results from OncoKB API calls and summarise "
                    "them for the user.",
                ),
                ("user", "{input}"),
            ],
        )
        summary_prompt = ONCOKB_SUMMARY_PROMPT.format(
            question=question,
            context=response_text,
        )
        output_parser = StrOutputParser()
        conversation = conversation_factory()
        chain = prompt | conversation.chat | output_parser
        answer = chain.invoke({"input": {summary_prompt}})
        return answer

`summarise_results(question, conversation_factory, response_text)`

Extract the answer from the BLAST results.

question (str): The question to be answered.
conversation_factory: A BioChatter conversation object.
response_text (str): The response.text returned by OncoKB.

str: The extracted answer from the BLAST results.

Source code in biochatter/api_agent/web/oncokb.py

def summarise_results(
    self,
    question: str,
    conversation_factory: Callable,
    response_text: str,
) -> str:
    """Extract the answer from the BLAST results.

    Args:
    ----
        question (str): The question to be answered.
        conversation_factory: A BioChatter conversation object.
        response_text (str): The response.text returned by OncoKB.

    Returns:
    -------
        str: The extracted answer from the BLAST results.

    """
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are a world class molecular biologist who knows "
                "everything about OncoKB and cancer genomics. Your task is "
                "to interpret results from OncoKB API calls and summarise "
                "them for the user.",
            ),
            ("user", "{input}"),
        ],
    )
    summary_prompt = ONCOKB_SUMMARY_PROMPT.format(
        question=question,
        context=response_text,
    )
    output_parser = StrOutputParser()
    conversation = conversation_factory()
    chain = prompt | conversation.chat | output_parser
    answer = chain.invoke({"input": {summary_prompt}})
    return answer

`OncoKBQueryBuilder`

Bases: BaseQueryBuilder

A class for building an OncoKBQuery object.

Source code in biochatter/api_agent/web/oncokb.py

class OncoKBQueryBuilder(BaseQueryBuilder):
    """A class for building an OncoKBQuery object."""

    def create_runnable(
        self,
        query_parameters: "OncoKBQueryParameters",
        conversation: "Conversation",
    ) -> Callable:
        """Creates a runnable object for executing queries using the LangChain
        `create_structured_output_runnable` method.

        Args:
        ----
            query_parameters: A Pydantic data model that specifies the fields of
                the API that should be queried.

            conversation: A BioChatter conversation object.

        Returns:
        -------
            A Callable object that can execute the query.

        """
        return create_structured_output_runnable(
            output_schema=query_parameters,
            llm=conversation.chat,
            prompt=self.structured_output_prompt,
        )

    def parameterise_query(
        self,
        question: str,
        conversation: "Conversation",
    ) -> list[OncoKBQueryParameters]:
        """Generate an OncoKBQuery object.

        Generate based on the given question, prompt, and BioChatter
        conversation. Uses a Pydantic model to define the API fields. Creates a
        runnable that can be invoked on LLMs that are qualified to parameterise
        functions.

        Args:
        ----
            question (str): The question to be answered.

            conversation: The conversation object used for parameterising the
                OncoKBQuery.

        Returns:
        -------
            OncoKBQueryParameters: the parameterised query object (Pydantic model)

        """
        runnable = self.create_runnable(
            query_parameters=OncoKBQueryParameters,
            conversation=conversation,
        )
        oncokb_call_obj = runnable.invoke(
            {"input": f"Answer:\n{question} based on:\n {ONCOKB_QUERY_PROMPT}"},
        )
        oncokb_call_obj.question_uuid = str(uuid.uuid4())
        return [oncokb_call_obj]

`create_runnable(query_parameters, conversation)`

Creates a runnable object for executing queries using the LangChain create_structured_output_runnable method.

query_parameters: A Pydantic data model that specifies the fields of
    the API that should be queried.

conversation: A BioChatter conversation object.

A Callable object that can execute the query.

Source code in biochatter/api_agent/web/oncokb.py

def create_runnable(
    self,
    query_parameters: "OncoKBQueryParameters",
    conversation: "Conversation",
) -> Callable:
    """Creates a runnable object for executing queries using the LangChain
    `create_structured_output_runnable` method.

    Args:
    ----
        query_parameters: A Pydantic data model that specifies the fields of
            the API that should be queried.

        conversation: A BioChatter conversation object.

    Returns:
    -------
        A Callable object that can execute the query.

    """
    return create_structured_output_runnable(
        output_schema=query_parameters,
        llm=conversation.chat,
        prompt=self.structured_output_prompt,
    )

`parameterise_query(question, conversation)`

Generate an OncoKBQuery object.

Generate based on the given question, prompt, and BioChatter conversation. Uses a Pydantic model to define the API fields. Creates a runnable that can be invoked on LLMs that are qualified to parameterise functions.

question (str): The question to be answered.

conversation: The conversation object used for parameterising the
    OncoKBQuery.

OncoKBQueryParameters: the parameterised query object (Pydantic model)

Source code in biochatter/api_agent/web/oncokb.py

def parameterise_query(
    self,
    question: str,
    conversation: "Conversation",
) -> list[OncoKBQueryParameters]:
    """Generate an OncoKBQuery object.

    Generate based on the given question, prompt, and BioChatter
    conversation. Uses a Pydantic model to define the API fields. Creates a
    runnable that can be invoked on LLMs that are qualified to parameterise
    functions.

    Args:
    ----
        question (str): The question to be answered.

        conversation: The conversation object used for parameterising the
            OncoKBQuery.

    Returns:
    -------
        OncoKBQueryParameters: the parameterised query object (Pydantic model)

    """
    runnable = self.create_runnable(
        query_parameters=OncoKBQueryParameters,
        conversation=conversation,
    )
    oncokb_call_obj = runnable.invoke(
        {"input": f"Answer:\n{question} based on:\n {ONCOKB_QUERY_PROMPT}"},
    )
    oncokb_call_obj.question_uuid = str(uuid.uuid4())
    return [oncokb_call_obj]

bio.tools

Module for interacting with the bio.tools API.

`BioToolsFetcher`

Bases: BaseFetcher

A class for retrieving API results from BioTools.

Retrieves API results given a parameterized BioToolsQuery.

Source code in biochatter/api_agent/web/bio_tools.py

class BioToolsFetcher(BaseFetcher):
    """A class for retrieving API results from BioTools.

    Retrieves API results given a parameterized BioToolsQuery.
    """

    def __init__(self, api_token: str = "demo") -> None:  # noqa: S107
        """Initialise the BioToolsFetcher.

        Args:
        ----
            api_token: The API token for the BioTools API.

        """
        self.headers = {
            "Authorization": f"Bearer {api_token}",
            "Accept": "application/json",
        }
        self.base_url = "https://bio.tools/api"

    def fetch_results(
        self,
        request_data: list[BioToolsQueryParameters],
        retries: int | None = 3,  # noqa: ARG002
    ) -> str:
        """Submit the BioTools query and fetch the results directly.

        No multi-step procedure, thus no wrapping of submission and retrieval in
        this case.

        Args:
        ----
            request_data: List of BioToolsQuery objects (Pydantic models)
                containing the BioTools query parameters.

            retries: The number of retries to fetch the results.

        Returns:
        -------
            str: The results of the BioTools query.

        """
        # For now, we only use the first query in the list
        query = request_data[0]

        # Submit the query and get the URL
        params = query.dict(exclude_unset=True)
        endpoint = params.pop("endpoint")
        params.pop("question_uuid")
        full_url = f"{self.base_url}/{endpoint}"
        response = requests.get(full_url, headers=self.headers, params=params, timeout=30)
        response.raise_for_status()

        # Fetch the results from the URL
        results_response = requests.get(response.url, headers=self.headers, timeout=30)
        results_response.raise_for_status()

        return results_response.text

`init(api_token='demo')`

Initialise the BioToolsFetcher.

api_token: The API token for the BioTools API.

Source code in biochatter/api_agent/web/bio_tools.py

def __init__(self, api_token: str = "demo") -> None:  # noqa: S107
    """Initialise the BioToolsFetcher.

    Args:
    ----
        api_token: The API token for the BioTools API.

    """
    self.headers = {
        "Authorization": f"Bearer {api_token}",
        "Accept": "application/json",
    }
    self.base_url = "https://bio.tools/api"

`fetch_results(request_data, retries=3)`

Submit the BioTools query and fetch the results directly.

No multi-step procedure, thus no wrapping of submission and retrieval in this case.

request_data: List of BioToolsQuery objects (Pydantic models)
    containing the BioTools query parameters.

retries: The number of retries to fetch the results.

str: The results of the BioTools query.

Source code in biochatter/api_agent/web/bio_tools.py

def fetch_results(
    self,
    request_data: list[BioToolsQueryParameters],
    retries: int | None = 3,  # noqa: ARG002
) -> str:
    """Submit the BioTools query and fetch the results directly.

    No multi-step procedure, thus no wrapping of submission and retrieval in
    this case.

    Args:
    ----
        request_data: List of BioToolsQuery objects (Pydantic models)
            containing the BioTools query parameters.

        retries: The number of retries to fetch the results.

    Returns:
    -------
        str: The results of the BioTools query.

    """
    # For now, we only use the first query in the list
    query = request_data[0]

    # Submit the query and get the URL
    params = query.dict(exclude_unset=True)
    endpoint = params.pop("endpoint")
    params.pop("question_uuid")
    full_url = f"{self.base_url}/{endpoint}"
    response = requests.get(full_url, headers=self.headers, params=params, timeout=30)
    response.raise_for_status()

    # Fetch the results from the URL
    results_response = requests.get(response.url, headers=self.headers, timeout=30)
    results_response.raise_for_status()

    return results_response.text

`BioToolsInterpreter`

Bases: BaseInterpreter

A class for interpreting BioTools results.

Source code in biochatter/api_agent/web/bio_tools.py

class BioToolsInterpreter(BaseInterpreter):
    """A class for interpreting BioTools results."""

    def summarise_results(
        self,
        question: str,
        conversation_factory: Callable,
        response_text: str,
    ) -> str:
        """Extract the answer from the BLAST results.

        Args:
        ----
            question (str): The question to be answered.
            conversation_factory: A BioChatter conversation object.
            response_text (str): The response.text returned by bio.tools.

        Returns:
        -------
            str: The extracted answer from the BLAST results.

        """
        prompt = ChatPromptTemplate.from_messages(
            [
                (
                    "system",
                    "You are a world class bioinformatician who knows "
                    "everything about bio.tools packages and the "
                    "bioinformatics ecosystem. Your task is to interpret "
                    "results from BioTools API calls and summarise "
                    "them for the user.",
                ),
                ("user", "{input}"),
            ],
        )
        summary_prompt = BIOTOOLS_SUMMARY_PROMPT.format(
            question=question,
            context=response_text,
        )
        output_parser = StrOutputParser()
        conversation = conversation_factory()
        chain = prompt | conversation.chat | output_parser
        return chain.invoke({"input": {summary_prompt}})

`summarise_results(question, conversation_factory, response_text)`

Extract the answer from the BLAST results.

question (str): The question to be answered.
conversation_factory: A BioChatter conversation object.
response_text (str): The response.text returned by bio.tools.

str: The extracted answer from the BLAST results.

Source code in biochatter/api_agent/web/bio_tools.py

def summarise_results(
    self,
    question: str,
    conversation_factory: Callable,
    response_text: str,
) -> str:
    """Extract the answer from the BLAST results.

    Args:
    ----
        question (str): The question to be answered.
        conversation_factory: A BioChatter conversation object.
        response_text (str): The response.text returned by bio.tools.

    Returns:
    -------
        str: The extracted answer from the BLAST results.

    """
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are a world class bioinformatician who knows "
                "everything about bio.tools packages and the "
                "bioinformatics ecosystem. Your task is to interpret "
                "results from BioTools API calls and summarise "
                "them for the user.",
            ),
            ("user", "{input}"),
        ],
    )
    summary_prompt = BIOTOOLS_SUMMARY_PROMPT.format(
        question=question,
        context=response_text,
    )
    output_parser = StrOutputParser()
    conversation = conversation_factory()
    chain = prompt | conversation.chat | output_parser
    return chain.invoke({"input": {summary_prompt}})

`BioToolsQueryBuilder`

Bases: BaseQueryBuilder

A class for building an BioToolsQuery object.

Source code in biochatter/api_agent/web/bio_tools.py

class BioToolsQueryBuilder(BaseQueryBuilder):
    """A class for building an BioToolsQuery object."""

    def create_runnable(
        self,
        query_parameters: "BioToolsQueryParameters",
        conversation: "Conversation",
    ) -> Callable:
        """Create a runnable object for executing queries.

        Create runnable using the LangChain `create_structured_output_runnable`
        method.

        Args:
        ----
            query_parameters: A Pydantic data model that specifies the fields of
                the API that should be queried.

            conversation: A BioChatter conversation object.

        Returns:
        -------
            A Callable object that can execute the query.

        """
        return create_structured_output_runnable(
            output_schema=query_parameters,
            llm=conversation.chat,
            prompt=self.structured_output_prompt,
        )

    def parameterise_query(
        self,
        question: str,
        conversation: "Conversation",
    ) -> list[BioToolsQueryParameters]:
        """Generate an BioToolsQuery object.

        Generate a BioToolsQuery object based on the given question, prompt,
        and BioChatter conversation. Uses a Pydantic model to define the API
        fields.  Creates a runnable that can be invoked on LLMs that are
        qualified to parameterise functions.

        Args:
        ----
            question (str): The question to be answered.

            conversation: The conversation object used for parameterising the
                BioToolsQuery.

        Returns:
        -------
            BioToolsQueryParameters: the parameterised query object (Pydantic
                model)

        """
        runnable = self.create_runnable(
            query_parameters=BioToolsQueryParameters,
            conversation=conversation,
        )
        oncokb_call_obj = runnable.invoke(
            {
                "input": f"Answer:\n{question} based on:\n {BIOTOOLS_QUERY_PROMPT}",
            },
        )
        oncokb_call_obj.question_uuid = str(uuid.uuid4())
        return [oncokb_call_obj]

`create_runnable(query_parameters, conversation)`

Create a runnable object for executing queries.

Create runnable using the LangChain create_structured_output_runnable method.

query_parameters: A Pydantic data model that specifies the fields of
    the API that should be queried.

conversation: A BioChatter conversation object.

A Callable object that can execute the query.

Source code in biochatter/api_agent/web/bio_tools.py

def create_runnable(
    self,
    query_parameters: "BioToolsQueryParameters",
    conversation: "Conversation",
) -> Callable:
    """Create a runnable object for executing queries.

    Create runnable using the LangChain `create_structured_output_runnable`
    method.

    Args:
    ----
        query_parameters: A Pydantic data model that specifies the fields of
            the API that should be queried.

        conversation: A BioChatter conversation object.

    Returns:
    -------
        A Callable object that can execute the query.

    """
    return create_structured_output_runnable(
        output_schema=query_parameters,
        llm=conversation.chat,
        prompt=self.structured_output_prompt,
    )

`parameterise_query(question, conversation)`

Generate an BioToolsQuery object.

Generate a BioToolsQuery object based on the given question, prompt, and BioChatter conversation. Uses a Pydantic model to define the API fields. Creates a runnable that can be invoked on LLMs that are qualified to parameterise functions.

question (str): The question to be answered.

conversation: The conversation object used for parameterising the
    BioToolsQuery.

BioToolsQueryParameters: the parameterised query object (Pydantic
    model)

Source code in biochatter/api_agent/web/bio_tools.py

def parameterise_query(
    self,
    question: str,
    conversation: "Conversation",
) -> list[BioToolsQueryParameters]:
    """Generate an BioToolsQuery object.

    Generate a BioToolsQuery object based on the given question, prompt,
    and BioChatter conversation. Uses a Pydantic model to define the API
    fields.  Creates a runnable that can be invoked on LLMs that are
    qualified to parameterise functions.

    Args:
    ----
        question (str): The question to be answered.

        conversation: The conversation object used for parameterising the
            BioToolsQuery.

    Returns:
    -------
        BioToolsQueryParameters: the parameterised query object (Pydantic
            model)

    """
    runnable = self.create_runnable(
        query_parameters=BioToolsQueryParameters,
        conversation=conversation,
    )
    oncokb_call_obj = runnable.invoke(
        {
            "input": f"Answer:\n{question} based on:\n {BIOTOOLS_QUERY_PROMPT}",
        },
    )
    oncokb_call_obj.question_uuid = str(uuid.uuid4())
    return [oncokb_call_obj]

`BioToolsQueryParameters`

Bases: BaseModel

Parameters for querying the bio.tools API.

Source code in biochatter/api_agent/web/bio_tools.py

class BioToolsQueryParameters(BaseModel):
    """Parameters for querying the bio.tools API."""

    base_url: str = Field(
        default="https://bio.tools/api/",
        description="Base URL for the BioTools API.",
    )
    endpoint: str = Field(
        ...,
        description="Specific API endpoint to hit. Example: 't/' for listing tools.",
    )
    biotoolsID: str | None = Field(  # noqa: N815
        None,
        description="Search for bio.tools tool ID (usually quoted - to get exact match)",
    )
    name: str | None = Field(
        None,
        description="Search for tool name (quoted as needed: quoted for exact match, unquoted for fuzzy search)",
    )
    homepage: str | None = Field(
        None,
        description="Exact search for tool homepage URL (**must** be quoted)",
    )
    description: str | None = Field(
        None,
        description="Search over tool description (quoted as needed)",
    )
    version: str | None = Field(
        None,
        description="Exact search for tool version (**must** be quoted)",
    )
    topic: str | None = Field(
        None,
        description="Search for EDAM Topic (term) (quoted as needed)",
    )
    topicID: str | None = Field(  # noqa: N815
        None,
        description="Exact search for EDAM Topic (URI): **must** be quoted",
    )
    function: str | None = Field(
        None,
        description="Fuzzy search over function (input, operation, output, note and command)",
    )
    operation: str | None = Field(
        None,
        description="Fuzzy search for EDAM Operation (term) (quoted as needed)",
    )
    operationID: str | None = Field(  # noqa: N815
        None,
        description="Exact search for EDAM Operation (ID) (**must** be quoted)",
    )
    dataType: str | None = Field(  # noqa: N815
        None,
        description="Fuzzy search over input and output for EDAM Data (term) (quoted as needed)",
    )
    dataTypeID: str | None = Field(  # noqa: N815
        None,
        description="Exact search over input and output for EDAM Data (ID) (**must** be quoted)",
    )
    dataFormat: str | None = Field(  # noqa: N815
        None,
        description="Fuzzy search over input and output for EDAM Format (term) (quoted as needed)",
    )
    dataFormatID: str | None = Field(  # noqa: N815
        None,
        description="Exact search over input and output for EDAM Format (ID) (**must** be quoted)",
    )
    input: str | None = Field(
        None,
        description="Fuzzy search over input for EDAM Data and Format (term) (quoted as needed)",
    )
    inputID: str | None = Field(  # noqa: N815
        None,
        description="Exact search over input for EDAM Data and Format (ID) (**must** be quoted)",
    )
    inputDataType: str | None = Field(  # noqa: N815
        None,
        description="Fuzzy search over input for EDAM Data (term) (quoted as needed)",
    )
    inputDataTypeID: str | None = Field(  # noqa: N815
        None,
        description="Exact search over input for EDAM Data (ID) (**must** be quoted)",
    )
    inputDataFormat: str | None = Field(  # noqa: N815
        None,
        description="Fuzzy search over input for EDAM Format (term) (quoted as needed)",
    )
    inputDataFormatID: str | None = Field(  # noqa: N815
        None,
        description="Exact search over input for EDAM Format (ID) (**must** be quoted)",
    )
    output: str | None = Field(
        None,
        description="Fuzzy search over output for EDAM Data and Format (term) (quoted as needed)",
    )
    outputID: str | None = Field(  # noqa: N815
        None,
        description="Exact search over output for EDAM Data and Format (ID) (**must** be quoted)",
    )
    outputDataType: str | None = Field(  # noqa: N815
        None,
        description="Fuzzy search over output for EDAM Data (term) (quoted as needed)",
    )
    outputDataTypeID: str | None = Field(  # noqa: N815
        None,
        description="Exact search over output for EDAM Data (ID) (**must** be quoted)",
    )
    outputDataFormat: str | None = Field(  # noqa: N815
        None,
        description="Fuzzy search over output for EDAM Format (term) (quoted as needed)",
    )
    outputDataFormatID: str | None = Field(  # noqa: N815
        None,
        description="Exact search over output for EDAM Format (ID) (**must** be quoted)",
    )
    toolType: str | None = Field(  # noqa: N815
        None,
        description="Exact search for tool type",
    )
    collectionID: str | None = Field(  # noqa: N815
        None,
        description="Exact search for tool collection (normally quoted)",
    )
    maturity: str | None = Field(
        None,
        description="Exact search for tool maturity",
    )
    operatingSystem: str | None = Field(  # noqa: N815
        None,
        description="Exact search for tool operating system",
    )
    language: str | None = Field(
        None,
        description="Exact search for programming language",
    )
    cost: str | None = Field(
        None,
        description="Exact search for cost",
    )
    license: str | None = Field(
        None,
        description="Exact search for software or data usage license (quoted as needed)",
    )
    accessibility: str | None = Field(
        None,
        description="Exact search for tool accessibility",
    )
    credit: str | None = Field(
        None,
        description="Fuzzy search over credit (name, email, URL, ORCID iD, type of entity, type of role and note)",
    )
    creditName: str | None = Field(  # noqa: N815
        None,
        description="Exact search for name of credited entity",
    )
    creditTypeRole: str | None = Field(  # noqa: N815
        None,
        description="Exact search for role of credited entity",
    )
    creditTypeEntity: str | None = Field(  # noqa: N815
        None,
        description="Exact search for type of credited entity",
    )
    creditOrcidID: str | None = Field(  # noqa: N815
        None,
        description="Exact search for ORCID iD of credited entity (**must** be quoted)",
    )
    publication: str | None = Field(
        None,
        description=(
            "Fuzzy search over publication (DOI, PMID, PMCID, publication type and tool version) (quoted as needed)"
        ),
    )
    publicationID: str | None = Field(  # noqa: N815
        None,
        description="Exact search for publication ID (DOI, PMID or PMCID) (**must** be quoted)",
    )
    publicationType: str | None = Field(  # noqa: N815
        None,
        description="Exact search for publication type",
    )
    publicationVersion: str | None = Field(  # noqa: N815
        None,
        description="Exact search for tool version associated with a publication (**must** be quoted)",
    )
    link: str | None = Field(
        None,
        description="Fuzzy search over general link (URL, type and note) (quote as needed)",
    )
    linkType: str | None = Field(  # noqa: N815
        None,
        description="Exact search for type of information found at a link",
    )
    documentation: str | None = Field(
        None,
        description="Fuzzy search over documentation link (URL, type and note) (quote as needed)",
    )
    documentationType: str | None = Field(  # noqa: N815
        None,
        description="Exact search for type of documentation",
    )
    download: str | None = Field(
        None,
        description="Fuzzy search over download link (URL, type, version and note) (quote as needed)",
    )
    downloadType: str | None = Field(  # noqa: N815
        None,
        description="Exact search for type of download",
    )
    downloadVersion: str | None = Field(  # noqa: N815
        None,
        description="Exact search for tool version associated with a download (**must** be quoted)",
    )
    otherID: str | None = Field(  # noqa: N815
        None,
        description="Fuzzy search over alternate tool IDs (ID value, type of ID and version)",
    )
    otherIDValue: str | None = Field(  # noqa: N815
        None,
        description="Exact search for value of alternate tool ID (**must** be quoted)",
    )
    otherIDType: str | None = Field(  # noqa: N815
        None,
        description="Exact search for type of alternate tool ID",
    )
    otherIDVersion: str | None = Field(  # noqa: N815
        None,
        description="Exact search for tool version associated with an alternate ID (**must** be quoted)",
    )
    question_uuid: str | None = Field(
        default_factory=lambda: str(uuid.uuid4()),
        description="Unique identifier for the question.",
    )

API Calling: Web APIs

BLAST

BlastFetcher

_fetch_results(rid, question_uuid, retries=10000)

_submit_query(request_data)

fetch_results(query_models, retries=20)

BlastInterpreter

summarise_results(question, conversation_factory, response_text)

BlastQueryBuilder

create_runnable(query_parameters, conversation)

parameterise_query(question, conversation)

BlastQueryParameters

OncoKB

OncoKBFetcher

fetch_results(request_data, retries=3)

OncoKBInterpreter

summarise_results(question, conversation_factory, response_text)

OncoKBQueryBuilder

create_runnable(query_parameters, conversation)

parameterise_query(question, conversation)

bio.tools

BioToolsFetcher

__init__(api_token='demo')

fetch_results(request_data, retries=3)

BioToolsInterpreter

summarise_results(question, conversation_factory, response_text)

BioToolsQueryBuilder

create_runnable(query_parameters, conversation)

parameterise_query(question, conversation)

BioToolsQueryParameters

`BlastFetcher`

`_fetch_results(rid, question_uuid, retries=10000)`

`_submit_query(request_data)`

`fetch_results(query_models, retries=20)`

`BlastInterpreter`

`summarise_results(question, conversation_factory, response_text)`

`BlastQueryBuilder`

`create_runnable(query_parameters, conversation)`

`parameterise_query(question, conversation)`

`BlastQueryParameters`

`OncoKBFetcher`

`fetch_results(request_data, retries=3)`

`OncoKBInterpreter`

`summarise_results(question, conversation_factory, response_text)`

`OncoKBQueryBuilder`

`create_runnable(query_parameters, conversation)`

`parameterise_query(question, conversation)`

`BioToolsFetcher`

`init(api_token='demo')`

`fetch_results(request_data, retries=3)`

`BioToolsInterpreter`

`summarise_results(question, conversation_factory, response_text)`

`BioToolsQueryBuilder`

`create_runnable(query_parameters, conversation)`

`parameterise_query(question, conversation)`

`BioToolsQueryParameters`