Skip to content

Commit

Permalink
release 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
banjuede committed Nov 6, 2023
1 parent 4ce9c2a commit 48f9cf3
Show file tree
Hide file tree
Showing 18 changed files with 294 additions and 6 deletions.
5 changes: 5 additions & 0 deletions examples/0.prepare/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Let's download model from huggingface.
cd ../data
git clone --depth 1 https://huggingface.co/facebook/opt-125m
git clone --depth 1 https://huggingface.co/bigscience/bloom-560m
cd ../0.prepare
7 changes: 7 additions & 0 deletions examples/1.import/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash
# clean up the model which named opt-125m.
lms del --model_name opt-125m
# import it to LMS.
lms import --model_path ../data/opt-125m
# list model
lms list
21 changes: 21 additions & 0 deletions examples/2.deploy/custom_load_pipeline/pipeline_loader_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from transformers import pipeline


class PipelineLoader:
@staticmethod
def load(model_path: str, **kwargs):
print("====================")
if 'device' in kwargs:
pipe = pipeline(task="text2text-generation",
model=model_path,
trust_remote_code=True,
device=kwargs['device'],
model_kwargs={"ignore_mismatched_sizes": True})
else:
pipe = pipeline(task="text2text-generation",
model=model_path,
trust_remote_code=True,
model_kwargs={"ignore_mismatched_sizes": True})
return pipe


27 changes: 27 additions & 0 deletions examples/2.deploy/custom_load_pipeline/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

api_key=default
port=18991
# deploy model
lms deploy --model_name opt-125m --gpu 0 --port 18991 --api_key $api_key --infer_py pipeline_loader_example.py

# test prediction
curl -v --noproxy '*' --location 'http://127.0.0.1:18991/prediction' \
--header 'Api_key: default' \
--header 'Content-Type: application/json' \
--data '{
"messages": [
{
"role": "User",
"content": "hello"
}
],
"repetition_penalty": 1.2,
"top_k": 40,
"top_p": 0.5,
"temperature": 0.7,
"max_new_tokens": 100
}'

# undeploy model
lms undeploy --model_name opt-125m
18 changes: 18 additions & 0 deletions examples/2.deploy/setting_infer_config/infer_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"prompt_role": {
"User": "User",
"Assistant": "Assistant"
},
"tokenizer_kwargs": {
"padding_side": "left"
},
"model_kwargs": {
},
"generate": {
"repetition_penalty": 1.2,
"top_k": 40,
"top_p": 0.5,
"temperature": 0.7,
"max_new_tokens": 512
}
}
30 changes: 30 additions & 0 deletions examples/2.deploy/setting_infer_config/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

api_key=default
port=18991

lms deploy --model_name opt-125m --gpu 0 --port 18991 --api_key $api_key --infer_config infer_config.json




curl -v --noproxy '*' --location 'http://127.0.0.1:18991/prediction' \
--header 'Api_key: default' \
--header 'Content-Type: application/json' \
--data '{
"messages": [
{
"role": "User",
"content": "hello"
}
],
"repetition_penalty": 1.2,
"top_k": 40,
"top_p": 0.5,
"temperature": 0.7,
"max_new_tokens": 100
}'



lms undeploy --model_name opt-125m
4 changes: 4 additions & 0 deletions examples/3.evaluate/automatic/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@


lms eval --model_name opt-125m --task ARC,MMLU --output_path out.json

6 changes: 6 additions & 0 deletions examples/3.evaluate/custom/custom.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Question,A,B,C,D,Answer
肉牛屠宰后,胴体的哪个部位肉质较好,大腿,腹,胸,小腿,A
下列鸭品种中,产蛋量最高的品种是,高邮鸭,北京鸭,樱桃谷鸭,绍鸭,D
常见家畜中输精管不形成输精管壶腹的是,马,猪,牛,羊,B
牛的发情周期平均为,16~17天,21天,28天,19~20天,B
羊胴体中,肉质较好的部位是,胸下肉,肩胛肉,后腿肉,小腿肉,C
121 changes: 121 additions & 0 deletions examples/3.evaluate/custom/custom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import argparse
import csv
import json
from typing import List
import torch
import evaluate
from datasets import Dataset
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline, pipeline


class CustomDataset():
@staticmethod
def load(data_path: str):
pre_prompt = "以下是单项选择题,请直接给出正确答案的选项。\n题目:"
post_prompt: str = "答案是:"
raw_data = []
with open(data_path, encoding='utf-8') as f:
reader = csv.reader(f)
_ = next(reader) # skip the header
for row in reader:
assert len(row) == 6
question = row[0]
A = row[1]
B = row[2]
C = row[3]
D = row[4]
raw_data.append({
'question': pre_prompt + f"{question}\nA. {A}\nB. {B}\nC. {C}\nD. {D}" + post_prompt,
'answer': row[5],
})
dataset = Dataset.from_list(raw_data)
return dataset

# dataset为Dataset结构,形式为:
# Dataset({
# features: ['question', 'answer'],
# num_rows: xxx
# })

@staticmethod
def eval(predictions: List, references: List, accuracy_script_path) -> dict:
"""Calculate scores.
Args:
predictions (List): List of predictions of each sample.
references (List): List of targets for each sample.
Returns:
dict: calculated scores.
"""
import os
metric = evaluate.load(accuracy_script_path)

def first_capital_postprocess(text):
for t in text:
if t.isupper():
return t
return ''

def preprocess(predictions, references):
mapping_to_int_dict = {
label: idx
for idx, label in enumerate(set(map(str, references)))
}
pred_set = set(predictions)
for pred in pred_set:
if str(pred) not in mapping_to_int_dict.keys():
mapping_to_int_dict[str(pred)] = len(mapping_to_int_dict)
golds = [mapping_to_int_dict[str(gold)] for gold in references]
preds = [mapping_to_int_dict[str(pred)] for pred in predictions]
return {
'predictions': preds,
'references': golds,
}

predictions = list(map(first_capital_postprocess, predictions))
scores = metric.compute(**preprocess(predictions, references))
result = {}
result["acc"] = round(scores['accuracy'], 2)
return result
# reuslt结构为{"xxx":0.8} xxx如:acc,f1,bleu,rouge1等样式的自定义评估指标


def parse_args():
parser = argparse.ArgumentParser(description='Run an evaluation task')
parser.add_argument('--model_path', help='model_name_or_path')
parser.add_argument('--data_path', help='data_path')
parser.add_argument('--output_path', help='output_path')
args = parser.parse_args()
return args


def infer(model_path, datalist):
try:
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto", trust_remote_code=True)
pipe = TextGenerationPipeline(model=model, tokenizer=tokenizer, torch_dtype=torch.float16)
except:
pipe = pipeline("text2text-generation", model=model_path, device_map="auto", trust_remote_code=True,
torch_dtype=torch.float16)
predict = []
for text in tqdm(datalist):
out = pipe(text, max_new_tokens=64)
predict.append(out[0]["generated_text"][len(text):])
return predict


def do_eval(data_path, model_path, output_path, accuracy_script_path):
# 数据集加载
dataset = CustomDataset.load(data_path)
# 模型推理预测
predict = infer(model_path, dataset["question"])

# 数据集评估
result = CustomDataset.eval(predict, dataset["answer"], accuracy_script_path)
results_metric = {"model": model_path, "benchmarks": [{"benchmark_name": "custom1", "metrics": result}]}
with open(output_path, 'w') as write_f:
write_f.write(json.dumps(results_metric, indent=4, ensure_ascii=False))
print(result)
return results_metric
4 changes: 4 additions & 0 deletions examples/3.evaluate/custom/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@


lms eval --model_name opt-125m --task custom.py --input_path custom.csv --output_path out.json

6 changes: 6 additions & 0 deletions examples/3.evaluate/human/origin.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
input,Expected
肉牛屠宰后,胴体的哪个部位肉质较好,大腿
下列鸭品种中,产蛋量最高的品种是,绍鸭
常见家畜中输精管不形成输精管壶腹的是,
牛的发情周期平均为,
羊胴体中,肉质较好的部位是,后腿肉
4 changes: 4 additions & 0 deletions examples/3.evaluate/human/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@


lms eval --model_name opt-125m --task human --input_path origin.csv --output_path target.csv

3 changes: 3 additions & 0 deletions examples/4.prune/sprase/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#purne model with sparse

lms pruning sparse --model_name alaya065-ep2-fp16 --pruned_model_path alaya_sparsed --nsamples 128 --device cpu --layer_name_start blocks.3 --layer_name_stop blocks.4
3 changes: 3 additions & 0 deletions examples/4.prune/structure/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# prune model with structure

lms pruning structure --model_name bloom-560m --pruned_model_path bloom-560m_structure --nsamples 128 --device cpu --model_type bloom --block_attention_layer_start 1 --block_attention_layer_end 22 --block_mlp_layer_start 1 --block_mlp_layer_end 22
3 changes: 3 additions & 0 deletions examples/5.quantize/int4/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# quantize model to int4 percision

lms quantization --model_name opt-125m --int4 --quantized_model_path quantized_model
3 changes: 3 additions & 0 deletions examples/5.quantize/int8/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# quantize model to int8 percision

lms quantization --model_name opt-125m --int8 --quantized_model_path quantized_model
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[project]
name = "lms"
version = "0.1.0"
name = "dc-lms"
version = "1.0.0"
requires-python = ">=3.9.0"
dependencies = [
#====for web
Expand Down Expand Up @@ -48,7 +48,7 @@ dependencies = [
[tool.setuptools.packages.find]
where = ["."] # ["."] by default
include = ["lms*"] # ["*"] by default
exclude = ["lms.tests*"] # empty by default
exclude = ["lms.tests*","tests*"] # empty by default
namespaces = true # true by default

[project.scripts]
Expand Down
29 changes: 26 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,27 @@
from setuptools import setup
from setuptools import setup, find_packages

# https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html
setup()
LMS_VERSION = '1.0.0'

setup(
name='dc-lms',
version=LMS_VERSION,
packages=find_packages(),
include_package_data=True,
url='https://github.com/DataCanvasIO/LMS',
license='Apache License 2.0',
author='datacanvas',
description='',
long_description="""
LMS(Large Model Serving) is an open source tool that provides large model services.
See the LMS HOME https://github.com/DataCanvasIO/LMS for details.
""",
long_description_content_type="text/markdown",
entry_points={
"console_scripts": ['lms = lms.web:main']
},
python_requires='>=3.9',
classifiers=[
"License :: OSI Approved :: Apache Software License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9"]
)

0 comments on commit 48f9cf3

Please sign in to comment.