Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SDK/CLI] Fix encoding issue with non-English input #683

Merged
merged 7 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{"text": "Python Hello World!"}
{"text": "C Hello World!"}
{"text": "C# Hello World!"}
{"text": "C# Hello World!"}
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ nodes:
api: chat
node_variants: {}
environment:
python_requirements_txt: requirements.txt
python_requirements_txt: requirements.txt
2 changes: 2 additions & 0 deletions src/promptflow/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
- **pf config set**:
- Fix bug for workspace `connection.provider=azureml` doesn't work as expected.
- [SDK/CLI] Fix the bug that using sdk/cli to submit batch run did not display the log correctly.
- [SDK/CLI] Fix encoding issues when input is non-English with `pf flow test`.

### Improvements

- [SDK/CLI] Experience improvements in `pf run visualize` page:
- Add column status.
- Support opening flow file by clicking run id.


## 0.1.0b7.post1 (2023.09.28)

### Bug Fixed
Expand Down
10 changes: 5 additions & 5 deletions src/promptflow/promptflow/_sdk/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def in_jupyter_notebook() -> bool:


def render_jinja_template(template_path, *, trim_blocks=True, keep_trailing_newline=True, **kwargs):
with open(template_path, "r") as f:
with open(template_path, "r", encoding=DEFAULT_ENCODING) as f:
template = Template(f.read(), trim_blocks=trim_blocks, keep_trailing_newline=keep_trailing_newline)
return template.render(**kwargs)

Expand Down Expand Up @@ -430,7 +430,7 @@ def _sanitize_python_variable_name(name: str):


def _get_additional_includes(yaml_path):
with open(yaml_path, "r") as f:
with open(yaml_path, "r", encoding=DEFAULT_ENCODING) as f:
flow_dag = yaml.safe_load(f)
return flow_dag.get("additional_includes", [])

Expand Down Expand Up @@ -661,7 +661,7 @@ def generate_flow_tools_json(
"""
flow_directory = Path(flow_directory).resolve()
# parse flow DAG
with open(flow_directory / DAG_FILE_NAME, "r") as f:
with open(flow_directory / DAG_FILE_NAME, "r", encoding=DEFAULT_ENCODING) as f:
data = yaml.safe_load(f)
tools = [] # List[Tuple[source_file, tool_type]]
used_packages = set()
Expand Down Expand Up @@ -829,7 +829,7 @@ def refresh_connections_dir(connection_spec_files, connection_template_yamls):
if connection_spec_files and connection_template_yamls:
for connection_name, content in connection_spec_files.items():
file_name = connection_name + ".spec.json"
with open(connections_dir / file_name, "w") as f:
with open(connections_dir / file_name, "w", encoding=DEFAULT_ENCODING) as f:
json.dump(content, f, indent=2)

# use YAML to dump template file in order to keep the comments
Expand All @@ -838,5 +838,5 @@ def refresh_connections_dir(connection_spec_files, connection_template_yamls):
for connection_name, content in connection_template_yamls.items():
yaml_data = yaml.load(content)
file_name = connection_name + ".template.yaml"
with open(connections_dir / file_name, "w") as f:
with open(connections_dir / file_name, "w", encoding=DEFAULT_ENCODING) as f:
yaml.dump(yaml_data, f)
9 changes: 5 additions & 4 deletions src/promptflow/promptflow/_sdk/operations/_run_submitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from promptflow._sdk._constants import (
DAG_FILE_NAME,
DEFAULT_ENCODING,
DEFAULT_VAR_ID,
INPUTS,
NODE,
Expand Down Expand Up @@ -59,7 +60,7 @@ def _load_flow_dag(flow_path: Path):
if not flow_path.exists():
raise FileNotFoundError(f"Flow file {flow_path} not found")

with open(flow_path, "r") as f:
with open(flow_path, "r", encoding=DEFAULT_ENCODING) as f:
flow_dag = yaml.safe_load(f)
return flow_path, flow_dag

Expand Down Expand Up @@ -102,7 +103,7 @@ def overwrite_variant(flow_path: Path, tuning_node: str = None, variant: str = N
except KeyError as e:
raise KeyError("Failed to overwrite tuning node with variant") from e

with open(flow_path, "w") as f:
with open(flow_path, "w", encoding=DEFAULT_ENCODING) as f:
yaml.safe_dump(flow_dag, f)


Expand Down Expand Up @@ -148,14 +149,14 @@ def overwrite_connections(flow_path: Path, connections: dict, working_dir: PathL
raise InvalidFlowError(f"Connection with name {c} not found in node {node_name}'s inputs")
node[INPUTS][c] = v

with open(flow_path, "w") as f:
with open(flow_path, "w", encoding=DEFAULT_ENCODING) as f:
yaml.safe_dump(flow_dag, f)


def remove_additional_includes(flow_path: Path):
flow_path, flow_dag = _load_flow_dag(flow_path=flow_path)
flow_dag.pop("additional_includes", None)
with open(flow_path, "w") as f:
with open(flow_path, "w", encoding=DEFAULT_ENCODING) as f:
yaml.safe_dump(flow_dag, f)


Expand Down
3 changes: 2 additions & 1 deletion src/promptflow/promptflow/contracts/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import yaml

from promptflow.exceptions import ErrorTarget
from .._sdk._constants import DEFAULT_ENCODING

from .._utils.dataclass_serializer import serialize
from .._utils.utils import try_import
Expand Down Expand Up @@ -522,7 +523,7 @@ def _resolve_working_dir(flow_file: Path, working_dir=None) -> Path:
def from_yaml(flow_file: Path, working_dir=None) -> "Flow":
"""Load flow from yaml file."""
working_dir = Flow._resolve_working_dir(flow_file, working_dir)
with open(working_dir / flow_file, "r") as fin:
with open(working_dir / flow_file, "r", encoding=DEFAULT_ENCODING) as fin:
flow = Flow.deserialize(yaml.safe_load(fin))
flow._set_tool_loader(working_dir)
return flow
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,7 @@ def test_pf_flow_with_aggregation(self):
result = _client._flows._test(flow=flow_path, inputs=inputs)
assert "calculate_accuracy" in result.node_run_infos
assert result.run_info.metrics == {"accuracy": 1.0}

def test_pf_test_with_non_english_input(self):
result = _client.test(flow=f"{FLOWS_DIR}/flow_with_non_english_input")
assert result["output"] == "Hello 日本語"
9 changes: 9 additions & 0 deletions src/promptflow/tests/sdk_cli_test/unittests/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from promptflow._sdk._pf_client import PFClient
from promptflow._sdk._run_functions import create_yaml_run
from promptflow._sdk.entities import Run
from promptflow._sdk.operations._local_storage_operations import LocalStorageOperations
from promptflow._sdk.operations._run_submitter import RunSubmitter, overwrite_variant, variant_overwrite_context

PROMOTFLOW_ROOT = Path(__file__) / "../../../.."
Expand Down Expand Up @@ -177,3 +178,11 @@ def test_submit(self, mock_update):

# Check if Run.update method was called
mock_update.assert_called_once()

def test_flow_run_with_non_english_inputs(self, pf):
flow_path = f"{FLOWS_DIR}/flow_with_non_english_input"
data = f"{FLOWS_DIR}/flow_with_non_english_input/data.jsonl"
run = pf.run(flow=flow_path, data=data, column_mapping={"text": "${data.text}"})
local_storage = LocalStorageOperations(run=run)
outputs = local_storage.load_outputs()
assert outputs == {"output": ["Hello 123 日本語", "World 123 日本語"]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"text": "Hello 123 日本語"}
{"text": "World 123 日本語"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
0mza987 marked this conversation as resolved.
Show resolved Hide resolved
inputs:
text:
type: string
default: Hello 日本語
outputs:
output:
type: string
reference: ${hello_prompt.output}
nodes:
- name: hello_prompt
type: prompt
source:
type: code
path: hello.jinja2
inputs:
text: ${inputs.text}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{{text}}
Loading