diff --git a/samples/cpp/beam_search_causal_lm/beam_search_causal_lm.cpp b/samples/cpp/beam_search_causal_lm/beam_search_causal_lm.cpp
index 236b31b351..fc18fa8e0c 100644
--- a/samples/cpp/beam_search_causal_lm/beam_search_causal_lm.cpp
+++ b/samples/cpp/beam_search_causal_lm/beam_search_causal_lm.cpp
@@ -17,6 +17,7 @@ int main(int argc, char* argv[]) try {
     config.max_new_tokens = 20;
     config.num_beam_groups = 3;
     config.num_beams = 15;
+    config.diversity_penalty = 1.0f;
     config.num_return_sequences = config.num_beams;
        
     // Since the streamer is set, the results will
diff --git a/samples/python/beam_search_causal_lm/beam_search_causal_lm.py b/samples/python/beam_search_causal_lm/beam_search_causal_lm.py
index 16b8b76175..4e2430a47f 100755
--- a/samples/python/beam_search_causal_lm/beam_search_causal_lm.py
+++ b/samples/python/beam_search_causal_lm/beam_search_causal_lm.py
@@ -19,6 +19,7 @@ def main():
     config.max_new_tokens = 20
     config.num_beam_groups = 3
     config.num_beams = 15
+    config.diversity_penalty = 1
     config.num_return_sequences = config.num_beams
 
     beams = pipe.generate(args.prompts, config)
diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp
index 4ea75e94c5..164ff29131 100644
--- a/src/cpp/include/openvino/genai/generation_config.hpp
+++ b/src/cpp/include/openvino/genai/generation_config.hpp
@@ -93,15 +93,22 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     bool echo = false;
     size_t logprobs = 0;
 
+    // EOS special token
+    int64_t eos_token_id = -1;
     std::set<std::string> stop_strings;
     // Default setting in vLLM (and OpenAI API) is not to include stop string in the output
     bool include_stop_str_in_output = false;
     std::set<int64_t> stop_token_ids;
 
+    // penalties (not used in beam search)
+    float repetition_penalty = 1.0f;
+    float presence_penalty = 0.0;
+    float frequency_penalty = 0.0f;
+
     // Beam search specific
     size_t num_beam_groups = 1;
     size_t num_beams = 1;
-    float diversity_penalty = 1.0f;
+    float diversity_penalty = 0.0f;
     float length_penalty = 1.0f;
     size_t num_return_sequences = 1;
     size_t no_repeat_ngram_size = std::numeric_limits<size_t>::max();
@@ -112,9 +119,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     float top_p = 1.0f;
     size_t top_k = std::numeric_limits<size_t>::max();
     bool do_sample = false;
-    float repetition_penalty = 1.0f;
-    float presence_penalty = 0.0;
-    float frequency_penalty = 0.0f;
     size_t rng_seed = 0;
 
     // Assisting generation parameters
@@ -122,9 +126,6 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     size_t num_assistant_tokens = 0;
     size_t max_ngram_size = 0;
 
-    // EOS special token
-    int64_t eos_token_id = -1;
-
     std::optional<AdapterConfig> adapters;
 
     /** @brief sets eos_token_id to tokenizer_eos_token_id if eos_token_id is less than 0.
@@ -136,11 +137,13 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
     bool is_greedy_decoding() const;
     bool is_beam_search() const;
     bool is_multinomial() const;
-    OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2025.0.0 release")
-    bool is_speculative_decoding() const;
     bool is_assisting_generation() const;
     bool is_prompt_lookup() const;
-    void update_generation_config(const ov::AnyMap& config_map);
+
+    OPENVINO_DEPRECATED("Please, use `is_assisting_generation()` instead of `is_speculative_decoding()`. This method will be removed in 2026.0.0 release")
+    bool is_speculative_decoding() const;
+
+    void update_generation_config(const ov::AnyMap& properties);
 
     template <typename... Properties>
     util::EnableIfAllStringAny<void, Properties...> update_generation_config(Properties&&... properties) {
@@ -187,8 +190,13 @@ static constexpr ov::Property<float> assistant_confidence_threshold{"assistant_c
 static constexpr ov::Property<size_t> num_assistant_tokens{"num_assistant_tokens"};
 
 // Predefined Configs
+
+OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release")
 OPENVINO_GENAI_EXPORTS GenerationConfig beam_search();
+OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release")
 OPENVINO_GENAI_EXPORTS GenerationConfig greedy();
+OPENVINO_DEPRECATED("Please, use individual parameters instead of predefined configs. This method will be removed in 2026.0.0 release")
 OPENVINO_GENAI_EXPORTS GenerationConfig multinomial();
+
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
index 4ff184547e..59be603fd9 100644
--- a/src/cpp/src/generation_config.cpp
+++ b/src/cpp/src/generation_config.cpp
@@ -24,6 +24,7 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
 
     nlohmann::json data = nlohmann::json::parse(f);
 
+    read_json_param(data, "eos_token_id", eos_token_id);
     read_json_param(data, "max_new_tokens", max_new_tokens);
     read_json_param(data, "max_length", max_length);
     // note that ignore_eos is not present in HF GenerationConfig
@@ -32,28 +33,40 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
     read_json_param(data, "stop_strings", stop_strings);
     // note that include_stop_str_in_output is not present in HF GenerationConfig
     read_json_param(data, "include_stop_str_in_output", include_stop_str_in_output);
-    // note that stop_token_ids is not present in HF GenerationConfig
-    read_json_param(data, "stop_token_ids", stop_token_ids);
+    // note that stop_token_ids is not present in HF GenerationConfig, but some generation_config.json define
+    // multiple eos_token_id (e.g. https://huggingface.co/OpenGVLab/InternVL2-4B/blob/main/generation_config.json)
+    // so, we need to read them as 'stop_token_ids'
+    std::vector<int64_t> ordered_stop_token_ids;
+    read_json_param(data, "eos_token_id", ordered_stop_token_ids);
+
+    if (!ordered_stop_token_ids.empty()) {
+        for (int64_t stop_token_id : ordered_stop_token_ids)
+            stop_token_ids.insert(stop_token_id);
+
+        if (eos_token_id == -1) {
+            eos_token_id = ordered_stop_token_ids[0];
+        }
+    }
+
+    // note that echo is not present in HF GenerationConfig
+    read_json_param(data, "echo", echo);
+    // note that logprobs is not present in HF GenerationConfig
+    read_json_param(data, "logprobs", logprobs);
+
+    // penalties
+    read_json_param(data, "repetition_penalty", repetition_penalty);
+    // note that frequency_penalty is not present in HF GenerationConfig
+    read_json_param(data, "frequency_penalty", frequency_penalty);
+    // note that presence_penalty is not present in HF GenerationConfig
+    read_json_param(data, "presence_penalty", presence_penalty);
+
+    // beam search
     read_json_param(data, "num_beam_groups", num_beam_groups);
     read_json_param(data, "num_beams", num_beams);
     read_json_param(data, "diversity_penalty", diversity_penalty);
     read_json_param(data, "length_penalty", length_penalty);
     read_json_param(data, "num_return_sequences", num_return_sequences);
     read_json_param(data, "no_repeat_ngram_size", no_repeat_ngram_size);
-    read_json_param(data, "temperature", temperature);
-    read_json_param(data, "top_p", top_p);
-    read_json_param(data, "top_k", top_k);
-    read_json_param(data, "do_sample", do_sample);
-    read_json_param(data, "repetition_penalty", repetition_penalty);
-    read_json_param(data, "eos_token_id", eos_token_id);
-    // note that echo is not present in HF GenerationConfig
-    read_json_param(data, "echo", echo);
-    // note that logprobs is not present in HF GenerationConfig
-    read_json_param(data, "logprobs", logprobs);
-
-    // append EOS to stop_token_ids
-    if (eos_token_id != -1)
-        set_eos_token_id(eos_token_id);
 
     if (data.contains("early_stopping")) {
         auto field_type = data["early_stopping"].type();
@@ -65,6 +78,21 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
             stop_criteria = StopCriteria::HEURISTIC;
         }
     }
+
+    // multinomial
+    read_json_param(data, "do_sample", do_sample);
+    read_json_param(data, "temperature", temperature);
+    read_json_param(data, "top_p", top_p);
+    read_json_param(data, "top_k", top_k);
+
+    // assistant generation
+    read_json_param(data, "assistant_confidence_threshold", assistant_confidence_threshold);
+    read_json_param(data, "num_assistant_tokens", num_assistant_tokens);
+    read_json_param(data, "max_ngram_size", max_ngram_size);
+
+    // append EOS to stop_token_ids
+    if (eos_token_id != -1)
+        set_eos_token_id(eos_token_id);
 }
 
 void GenerationConfig::set_eos_token_id(size_t tokenizer_eos_token_id) {
@@ -79,35 +107,50 @@ void GenerationConfig::set_eos_token_id(size_t tokenizer_eos_token_id) {
     stop_token_ids.insert(eos_token_id);
 }
 
-void GenerationConfig::update_generation_config(const ov::AnyMap& config_map) {
+void GenerationConfig::update_generation_config(const ov::AnyMap& properties) {
     using utils::read_anymap_param;
 
-    read_anymap_param(config_map, "max_new_tokens", max_new_tokens);
-    read_anymap_param(config_map, "max_length", max_length);
-    read_anymap_param(config_map, "ignore_eos", ignore_eos);
-    read_anymap_param(config_map, "min_new_tokens", min_new_tokens);
-    read_anymap_param(config_map, "stop_strings", stop_strings);
-    read_anymap_param(config_map, "include_stop_str_in_output", include_stop_str_in_output);
-    read_anymap_param(config_map, "stop_token_ids", stop_token_ids);
-    read_anymap_param(config_map, "num_beam_groups", num_beam_groups);
-    read_anymap_param(config_map, "num_beams", num_beams);
-    read_anymap_param(config_map, "diversity_penalty", diversity_penalty);
-    read_anymap_param(config_map, "length_penalty", length_penalty);
-    read_anymap_param(config_map, "num_return_sequences", num_return_sequences);
-    read_anymap_param(config_map, "no_repeat_ngram_size", no_repeat_ngram_size);
-    read_anymap_param(config_map, "stop_criteria", stop_criteria);
-    read_anymap_param(config_map, "temperature", temperature);
-    read_anymap_param(config_map, "top_p", top_p);
-    read_anymap_param(config_map, "top_k", top_k);
-    read_anymap_param(config_map, "do_sample", do_sample);
-    read_anymap_param(config_map, "repetition_penalty", repetition_penalty);
-    read_anymap_param(config_map, "eos_token_id", eos_token_id);
-    read_anymap_param(config_map, "echo", echo);
-    read_anymap_param(config_map, "logprobs", logprobs);
-    read_anymap_param(config_map, "adapters", adapters);
+    // stop conditions
+    read_anymap_param(properties, "eos_token_id", eos_token_id);
+    read_anymap_param(properties, "max_new_tokens", max_new_tokens);
+    read_anymap_param(properties, "max_length", max_length);
+    read_anymap_param(properties, "ignore_eos", ignore_eos);
+    read_anymap_param(properties, "min_new_tokens", min_new_tokens);
+    read_anymap_param(properties, "stop_strings", stop_strings);
+    read_anymap_param(properties, "include_stop_str_in_output", include_stop_str_in_output);
+    read_anymap_param(properties, "stop_token_ids", stop_token_ids);
+
+    // generic
+    read_anymap_param(properties, "echo", echo);
+    read_anymap_param(properties, "logprobs", logprobs);
+    read_anymap_param(properties, "num_return_sequences", num_return_sequences);
+    read_anymap_param(properties, "adapters", adapters);
 
+    // penalties
+    read_anymap_param(properties, "frequency_penalty", frequency_penalty);
+    read_anymap_param(properties, "presence_penalty", presence_penalty);
+    read_anymap_param(properties, "repetition_penalty", repetition_penalty);
+
+    // beam search
+    read_anymap_param(properties, "num_beam_groups", num_beam_groups);
+    read_anymap_param(properties, "num_beams", num_beams);
+    read_anymap_param(properties, "diversity_penalty", diversity_penalty);
+    read_anymap_param(properties, "length_penalty", length_penalty);
+    read_anymap_param(properties, "stop_criteria", stop_criteria);
+    read_anymap_param(properties, "no_repeat_ngram_size", no_repeat_ngram_size);
+
+    // multinomial
+    read_anymap_param(properties, "do_sample", do_sample);
+    read_anymap_param(properties, "temperature", temperature);
+    read_anymap_param(properties, "top_p", top_p);
+    read_anymap_param(properties, "top_k", top_k);
     // TODO: add support of 'generator' property similar to Image generation
-    read_anymap_param(config_map, "rng_seed", rng_seed);
+    read_anymap_param(properties, "rng_seed", rng_seed);
+
+    // assistant generation
+    read_anymap_param(properties, "assistant_confidence_threshold", assistant_confidence_threshold);
+    read_anymap_param(properties, "num_assistant_tokens", num_assistant_tokens);
+    read_anymap_param(properties, "max_ngram_size", max_ngram_size);
 }
 
 size_t GenerationConfig::get_max_new_tokens(size_t prompt_length) const {
@@ -136,69 +179,94 @@ bool GenerationConfig::is_speculative_decoding() const {
 }
 
 bool GenerationConfig::is_assisting_generation() const {
-    return (assistant_confidence_threshold > 0 || num_assistant_tokens > 0);
+    return assistant_confidence_threshold > 0 || num_assistant_tokens > 0;
 }
 
 bool GenerationConfig::is_prompt_lookup() const {
-    return (max_ngram_size > 0 && num_assistant_tokens > 0);
+    return max_ngram_size > 0 && num_assistant_tokens > 0;
 }
 
 void GenerationConfig::validate() const {
+    OPENVINO_ASSERT(num_return_sequences > 0, "num_return_sequences must be greater than 0");
+
+    // Stop conditions
+
     OPENVINO_ASSERT(eos_token_id == -1 || stop_token_ids.find(eos_token_id) != stop_token_ids.end(),
         "'stop_token_ids' must contain 'eos_token_id'. Please, call 'set_eos_token_id' with 'eos_token_id' value");
 
-    OPENVINO_ASSERT(!do_sample || num_beams == 1, 
-                    "Beam search with sampling is not supported yet. "
-                    "Please either set do_sample=false to use beam search "
-                    "or set num_beams=1 if you with to use multinomial sampling.");
-    OPENVINO_ASSERT(num_return_sequences > 0, "num_return_sequences must be greater than 0");
+    auto stop_token_ids_it = std::find_if(stop_token_ids.begin(), stop_token_ids.end(), [] (int64_t stop_token_id) -> bool {
+        return stop_token_id < 0;
+    });
+    OPENVINO_ASSERT(stop_token_ids_it == stop_token_ids.end(), "'stop_token_ids' must be non-negative, but it contains a value ", *stop_token_ids_it);
+
+    OPENVINO_ASSERT(!ignore_eos || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
+                    "ignore_eos is true, in this case either 'max_new_tokens', or 'max_length' should be defined.");
+
+    OPENVINO_ASSERT(eos_token_id != -1 || !stop_token_ids.empty() || !stop_strings.empty() || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
+                    "Either 'eos_token_id', or 'stop_token_ids', or 'stop_strings', or 'max_new_tokens', or 'max_length' should be defined.");
+
     OPENVINO_ASSERT(max_new_tokens > 0 || (max_new_tokens == 0 && echo), "'max_new_tokens' must be greater than 0, if `echo` is set, 0 is also accepted");
     OPENVINO_ASSERT(min_new_tokens <= max_new_tokens, "min_new_tokens must be less or equal max_new_tokens");
-    OPENVINO_ASSERT(
-        num_beams % num_beam_groups == 0,
-        "number of beams should be divisible by number of groups"
-    );
-    
-    // max_new_tokens has priority over max_length
-    // if max_new_tokens is defined no need to check max_length
-    OPENVINO_ASSERT(max_new_tokens != SIZE_MAX ||  max_length > 0, 
-                    "'max_length' must be greater than 0 or 'max_new_tokens' should be defined");
-
-    OPENVINO_ASSERT(!do_sample || top_k > 0,
-                    "top_k must be a strictly positive, but got ",
-                    top_k);
-    OPENVINO_ASSERT(!do_sample || (top_p > 0 && top_p <= 1.0f),
-                    "top_p must be a positive float > 0 and < 1, but got ",
-                    top_p);
-    OPENVINO_ASSERT(!do_sample || temperature > 0,
-                    "Temperature must be a strictly positive float, but got ",
-                    temperature);
-
-    OPENVINO_ASSERT(repetition_penalty > 0,
-                    "Repetition penalty must be a strictly positive float, but got ",
-                    repetition_penalty);
-    
-    OPENVINO_ASSERT(!ignore_eos || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
-                    "ignore_eos == true, in this case either 'max_new_tokens', or 'max_length' should be defined.");
 
-    OPENVINO_ASSERT(eos_token_id != -1 || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
-                    "Either 'eos_token_id', or 'max_new_tokens', or 'max_length' should be defined.");
+    // Sampling strategies
+
+    OPENVINO_ASSERT(num_return_sequences == 1 || (is_multinomial() || is_beam_search()), 
+        "'num_return_sequences' can be more than 1 only in case of beam search or multinomial sampling, but got ", num_return_sequences);
+
+    // generic penalties, but not supported by beam search currently
+    if (!is_beam_search()) {
+        OPENVINO_ASSERT(frequency_penalty >= -2.0f && frequency_penalty <= 2.0f, "'frequence_penalty' penalty must be within [-2.0; 2.0], but got ", frequency_penalty);
+        OPENVINO_ASSERT(presence_penalty >= -2.0f && presence_penalty <= 2.0f, "'presence_penalty' penalty must be within [-2.0; 2.0], but got ", presence_penalty);
+        OPENVINO_ASSERT(repetition_penalty > 0.0f, "'repetition_penalty' must be a strictly positive float, but got ", repetition_penalty);
+    } else {
+        OPENVINO_ASSERT(frequency_penalty == 0.0f, "'frequency_penalty' is not currently supported by beam search and should be 0.0f, but got ", frequency_penalty);
+        OPENVINO_ASSERT(presence_penalty == 0.0f, "'presence_penalty' is not currently supported by beam search and should be 0.0f, but got ", presence_penalty);
+        OPENVINO_ASSERT(repetition_penalty == 1.0f, "'repetition_penalty' is not currently supported by beam search and should be 1.0f, but got ", repetition_penalty);
+    }
+
+    if (is_multinomial()) {
+        OPENVINO_ASSERT(top_k >= 0, "When 'do_sample' is true, top_k must be a non-negative, but got ", top_k);
+        OPENVINO_ASSERT(top_p > 0 && top_p <= 1.0f, "When 'do_sample' is true, top_p must be a positive float > 0.0 and <= 1.0, but got ", top_p);
+        OPENVINO_ASSERT(temperature > 0, "When 'do_sample' is true, temperature must be a strictly positive float, but got ", temperature);
+    } else {
+        // parameters requiring multinomial
+        OPENVINO_ASSERT(top_k == std::numeric_limits<size_t>::max(), "When 'do_sample' is false, top_k must be max of size_t, but got ", top_k);
+        OPENVINO_ASSERT(top_p == 1.0f, "When 'do_sample' is false, top_p must be 1.0f, but got ", top_p);
+        OPENVINO_ASSERT(temperature == 1.0f, "When 'do_sample' is false, temperature must be a 1.0f, but got ", temperature);
+    }
+
     if (is_beam_search()) {
-        OPENVINO_ASSERT(no_repeat_ngram_size > 0, "no_repeat_ngram_size must be positive");
+        OPENVINO_ASSERT(num_beams % num_beam_groups == 0, "'num_beams' (", num_beams, ") should be divisible by 'num_beam_groups' (", num_beam_groups, ")");
+        OPENVINO_ASSERT(num_beams >= num_return_sequences, "'num_beams' (", num_beams, ") must be greater equal than 'num_return_sequences' (", num_return_sequences, ")");
+
+        OPENVINO_ASSERT(!do_sample,
+                        "Beam search with sampling is not supported yet. "
+                        "Please either set do_sample=false to use beam search "
+                        "or set num_beams=1 if you with to use multinomial sampling.");
+
+        OPENVINO_ASSERT(no_repeat_ngram_size > 0, "'no_repeat_ngram_size' must be positive");
         if (num_beam_groups > 1) {
-            OPENVINO_ASSERT(diversity_penalty != 0.0f, "For grouped beam search 'diversity_penalty' should not be zero, it it fallbacks to non-grouped beam search");
+            OPENVINO_ASSERT(diversity_penalty != 0.0f, "For grouped beam search 'diversity_penalty' should not be zero, otherwise it fallbacks to non-grouped beam search");
+        } else {
+            OPENVINO_ASSERT(diversity_penalty == 0.0f, "For beam search 'diversity_penalty' is applicable only when grouped beam search is used, but got 'num_beam_groups' == 1");
         }
     } else {
-        OPENVINO_ASSERT(frequency_penalty >= -2.0f && frequency_penalty <= 2.0f, "frequence_penalty penalty must be a [-2; +2]");
-        OPENVINO_ASSERT(presence_penalty >= -2.0f && presence_penalty <= 2.0f, "presence_penalty penalty must be a [-2; +2]");
+        // parameters requiring beam search
+        OPENVINO_ASSERT(num_beam_groups == 1, "'num_beam_groups' is supported by beam search only and should be 1 otherwise, but got ", num_beam_groups);
+        OPENVINO_ASSERT(no_repeat_ngram_size == std::numeric_limits<size_t>::max(), "'no_repeat_ngram_size' is supported only by beam search, otherwise should be set to max of size_t, but got ", no_repeat_ngram_size);
+        OPENVINO_ASSERT(diversity_penalty == 0.0f, "'diversity_penalty' is set to ", diversity_penalty, " (default is 0.0f), which is supported only by beam search sampling");
+        OPENVINO_ASSERT(length_penalty == 1.0f, "'length_penalty' is set to ", length_penalty, " (default is 1.0f), which is supported only by beam search sampling");
     }
+
+    // assistant generation
+
     if (is_assisting_generation()) {
-        if (assistant_confidence_threshold != 0.f) {
-            OPENVINO_ASSERT(num_assistant_tokens == 0, "Parameters `assistant_confidence_threshold` and `num_assistant_tokens` are mutually exclusive in `GenerationConfig`");
-            OPENVINO_ASSERT(!is_prompt_lookup(), "Parameters `assistant_confidence_threshold` cannot be used while Prompt Lookup decoding");
-        } else {
-            OPENVINO_ASSERT(num_assistant_tokens > 0, "Parameters `assistant_confidence_threshold` and `num_assistant_tokens` are mutually exclusive in `GenerationConfig`");
-        };
+        OPENVINO_ASSERT(!is_beam_search() && num_return_sequences == 1, "Beam search and parallel sampling are not compatible with assistant generation");
+        OPENVINO_ASSERT(assistant_confidence_threshold == 0.0f || num_assistant_tokens == 0, "Parameters `assistant_confidence_threshold` and `num_assistant_tokens` are mutually exclusive in `GenerationConfig`");
+    }
+
+    if (num_assistant_tokens == 0) {
+        OPENVINO_ASSERT(max_ngram_size == 0, "'max_ngram_size' should be set to default value 0 when prompt lookup is disabled");
     }
 }
 
diff --git a/src/cpp/src/json_utils.hpp b/src/cpp/src/json_utils.hpp
index 13d792e9db..4a4bb001df 100644
--- a/src/cpp/src/json_utils.hpp
+++ b/src/cpp/src/json_utils.hpp
@@ -4,6 +4,9 @@
 
 #pragma once
 
+#include <vector>
+#include <set>
+
 #include <nlohmann/json.hpp>
 
 namespace ov {
@@ -40,6 +43,15 @@ void read_json_param(const nlohmann::json& data, const std::string& name, std::v
     }
 }
 
+template <typename V>
+void read_json_param(const nlohmann::json& data, const std::string& name, std::set<V>& param) {
+    if (data.contains(name) && data[name].is_array()) {
+        for (const auto elem : data[name]) {
+            param.insert(elem.get<V>());
+        }
+    }
+}
+
 }  // namespace utils
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp
index 81f411020e..3e378e78cf 100644
--- a/src/cpp/src/llm_pipeline.cpp
+++ b/src/cpp/src/llm_pipeline.cpp
@@ -72,7 +72,6 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {
         const ov::AnyMap& config,
         const ov::genai::GenerationConfig& generation_config
     ) : LLMPipelineImplBase(tokenizer, generation_config), m_sampler(m_tokenizer) {
-        ov::Core core = utils::singleton_core();
         ov::CompiledModel compiled_model;
         auto [core_plugin_config, plugin_config] = ov::genai::utils::split_core_compile_config(config);
         utils::slice_matmul_stateful_model(model);
@@ -81,10 +80,10 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {
         if (auto filtered_plugin_config = extract_adapters_from_properties(plugin_config, &m_generation_config.adapters)) {
             m_generation_config.adapters->set_tensor_name_prefix("base_model.model.model.");
             m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device);   // TODO: Make the prefix name configurable
-            compiled_model = core.compile_model(model, device, *filtered_plugin_config);
+            compiled_model = utils::singleton_core().compile_model(model, device, *filtered_plugin_config);
             m_model_runner = compiled_model.create_infer_request();
         } else {
-            compiled_model = core.compile_model(model, device, plugin_config);
+            compiled_model = utils::singleton_core().compile_model(model, device, plugin_config);
             m_model_runner = compiled_model.create_infer_request();
         }
         ov::genai::utils::print_compiled_model_properties(compiled_model, "Stateful LLM model");
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index 8510a8389f..5d82fa89a3 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -367,16 +367,16 @@ class ContinuousBatchingPipeline:
     def __init__(self, models_path: os.PathLike, tokenizer: Tokenizer, scheduler_config: SchedulerConfig, device: str, properties: dict[str, typing.Any] = {}) -> None:
         ...
     @typing.overload
-    def add_request(self, request_id: int, input_ids: openvino._pyopenvino.Tensor, sampling_params: GenerationConfig) -> GenerationHandle:
+    def add_request(self, request_id: int, input_ids: openvino._pyopenvino.Tensor, generation_config: GenerationConfig) -> GenerationHandle:
         ...
     @typing.overload
-    def add_request(self, request_id: int, prompt: str, sampling_params: GenerationConfig) -> GenerationHandle:
+    def add_request(self, request_id: int, prompt: str, generation_config: GenerationConfig) -> GenerationHandle:
         ...
     @typing.overload
-    def generate(self, input_ids: list[openvino._pyopenvino.Tensor], sampling_params: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[EncodedGenerationResult]:
+    def generate(self, input_ids: list[openvino._pyopenvino.Tensor], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[EncodedGenerationResult]:
         ...
     @typing.overload
-    def generate(self, prompts: list[str], sampling_params: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[GenerationResult]:
+    def generate(self, prompts: list[str], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | StreamerBase | None = None) -> list[GenerationResult]:
         ...
     def get_config(self) -> GenerationConfig:
         ...
@@ -609,11 +609,15 @@ class GenerationConfig:
         ...
     def is_greedy_decoding(self) -> bool:
         ...
+    def is_multinomial(self) -> bool:
+        ...
     def is_prompt_lookup(self) -> bool:
         ...
     def set_eos_token_id(self, tokenizer_eos_token_id: int) -> None:
         ...
-    def update_generation_config(self, config_map: dict[str, openvino._pyopenvino.OVAny]) -> None:
+    def update_generation_config(self, **kwargs) -> None:
+        ...
+    def validate(self) -> None:
         ...
 class GenerationFinishReason:
     """
@@ -826,7 +830,7 @@ class Image2ImagePipeline:
         ...
     def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None:
         ...
-    def set_generation_config(self, generation_config: ImageGenerationConfig) -> None:
+    def set_generation_config(self, config: ImageGenerationConfig) -> None:
         ...
     def set_scheduler(self, scheduler: Scheduler) -> None:
         ...
@@ -927,7 +931,7 @@ class InpaintingPipeline:
         ...
     def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None:
         ...
-    def set_generation_config(self, generation_config: ImageGenerationConfig) -> None:
+    def set_generation_config(self, config: ImageGenerationConfig) -> None:
         ...
     def set_scheduler(self, scheduler: Scheduler) -> None:
         ...
@@ -1615,7 +1619,7 @@ class Text2ImagePipeline:
         ...
     def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None:
         ...
-    def set_generation_config(self, generation_config: ImageGenerationConfig) -> None:
+    def set_generation_config(self, config: ImageGenerationConfig) -> None:
         ...
     def set_scheduler(self, scheduler: Scheduler) -> None:
         ...
@@ -1865,9 +1869,9 @@ class VLMPipeline:
         ...
     def get_tokenizer(self) -> Tokenizer:
         ...
-    def set_chat_template(self, new_template: str) -> None:
+    def set_chat_template(self, chat_template: str) -> None:
         ...
-    def set_generation_config(self, new_config: GenerationConfig) -> None:
+    def set_generation_config(self, config: GenerationConfig) -> None:
         ...
     def start_chat(self, system_message: str = '') -> None:
         ...
@@ -2043,6 +2047,8 @@ class WhisperGenerationConfig:
         ...
     def set_eos_token_id(self, tokenizer_eos_token_id: int) -> None:
         ...
+    def update_generation_config(self, **kwargs) -> None:
+        ...
 class WhisperPerfMetrics(PerfMetrics):
     """
     
diff --git a/src/python/py_continuous_batching_pipeline.cpp b/src/python/py_continuous_batching_pipeline.cpp
index be7a72481f..2b48e4d44d 100644
--- a/src/python/py_continuous_batching_pipeline.cpp
+++ b/src/python/py_continuous_batching_pipeline.cpp
@@ -235,22 +235,22 @@ void init_continuous_batching_pipeline(py::module_& m) {
         .def("get_tokenizer", &ContinuousBatchingPipeline::get_tokenizer)
         .def("get_config", &ContinuousBatchingPipeline::get_config)
         .def("get_metrics", &ContinuousBatchingPipeline::get_metrics)
-        .def("add_request", py::overload_cast<uint64_t, const ov::Tensor&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("input_ids"), py::arg("sampling_params"))
-        .def("add_request", py::overload_cast<uint64_t, const std::string&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("prompt"), py::arg("sampling_params"))
+        .def("add_request", py::overload_cast<uint64_t, const ov::Tensor&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("input_ids"), py::arg("generation_config"))
+        .def("add_request", py::overload_cast<uint64_t, const std::string&, const ov::genai::GenerationConfig&>(&ContinuousBatchingPipeline::add_request), py::arg("request_id"), py::arg("prompt"), py::arg("generation_config"))
         .def("step", &ContinuousBatchingPipeline::step)
         .def("has_non_finished_requests", &ContinuousBatchingPipeline::has_non_finished_requests)
         .def(
             "generate",
             py::overload_cast<const std::vector<ov::Tensor>&, const std::vector<ov::genai::GenerationConfig>&, const ov::genai::StreamerVariant&>(&ContinuousBatchingPipeline::generate),
             py::arg("input_ids"),
-            py::arg("sampling_params"),
+            py::arg("generation_config"),
             py::arg("streamer") = std::monostate{}
         )
         .def(
             "generate",
             py::overload_cast<const std::vector<std::string>&, const std::vector<ov::genai::GenerationConfig>&, const ov::genai::StreamerVariant&>(&ContinuousBatchingPipeline::generate),
             py::arg("prompts"),
-            py::arg("sampling_params"),
+            py::arg("generation_config"),
             py::arg("streamer") = std::monostate{}
         );
 }
diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp
index f49bcf29bd..a97a43fc5c 100644
--- a/src/python/py_generation_config.cpp
+++ b/src/python/py_generation_config.cpp
@@ -118,7 +118,13 @@ void init_generation_config(py::module_& m) {
         .def("set_eos_token_id", &GenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"))
         .def("is_beam_search", &GenerationConfig::is_beam_search)
         .def("is_greedy_decoding", &GenerationConfig::is_greedy_decoding)
+        .def("is_multinomial", &GenerationConfig::is_multinomial)
         .def("is_assisting_generation", &GenerationConfig::is_assisting_generation)
         .def("is_prompt_lookup", &GenerationConfig::is_prompt_lookup)
-        .def("update_generation_config", static_cast<void (GenerationConfig::*)(const ov::AnyMap&)>(&ov::genai::GenerationConfig::update_generation_config), py::arg("config_map"));
+        .def("validate", &GenerationConfig::validate)
+        .def("update_generation_config", [](
+            ov::genai::GenerationConfig& config,
+            const py::kwargs& kwargs) {
+            config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
+        });
    }
diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp
index 311f3f3760..c246557a97 100644
--- a/src/python/py_image_generation_pipelines.cpp
+++ b/src/python/py_image_generation_pipelines.cpp
@@ -224,7 +224,7 @@ void init_image_generation_pipelines(py::module_& m) {
         .def_readwrite("max_sequence_length", &ov::genai::ImageGenerationConfig::max_sequence_length)
         .def("validate", &ov::genai::ImageGenerationConfig::validate)
         .def("update_generation_config", [](
-            ov::genai::ImageGenerationConfig config,
+            ov::genai::ImageGenerationConfig& config,
             const py::kwargs& kwargs) {
             config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
         });
@@ -255,8 +255,8 @@ void init_image_generation_pipelines(py::module_& m) {
             device (str): Device to run the model on (e.g., CPU, GPU).
             kwargs: Text2ImagePipeline properties
         )")
-        .def("get_generation_config", &ov::genai::Text2ImagePipeline::get_generation_config)
-        .def("set_generation_config", &ov::genai::Text2ImagePipeline::set_generation_config, py::arg("generation_config"))
+        .def("get_generation_config", &ov::genai::Text2ImagePipeline::get_generation_config, py::return_value_policy::copy)
+        .def("set_generation_config", &ov::genai::Text2ImagePipeline::set_generation_config, py::arg("config"))
         .def("set_scheduler", &ov::genai::Text2ImagePipeline::set_scheduler, py::arg("scheduler"))
         .def("reshape", &ov::genai::Text2ImagePipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale"))
         .def_static("stable_diffusion", &ov::genai::Text2ImagePipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
@@ -323,8 +323,8 @@ void init_image_generation_pipelines(py::module_& m) {
             device (str): Device to run the model on (e.g., CPU, GPU).
             kwargs: Image2ImagePipeline properties
         )")
-        .def("get_generation_config", &ov::genai::Image2ImagePipeline::get_generation_config)
-        .def("set_generation_config", &ov::genai::Image2ImagePipeline::set_generation_config, py::arg("generation_config"))
+        .def("get_generation_config", &ov::genai::Image2ImagePipeline::get_generation_config, py::return_value_policy::copy)
+        .def("set_generation_config", &ov::genai::Image2ImagePipeline::set_generation_config, py::arg("config"))
         .def("set_scheduler", &ov::genai::Image2ImagePipeline::set_scheduler, py::arg("scheduler"))
         .def("reshape", &ov::genai::Image2ImagePipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale"))
         .def_static("stable_diffusion", &ov::genai::Image2ImagePipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
@@ -386,8 +386,8 @@ void init_image_generation_pipelines(py::module_& m) {
             device (str): Device to run the model on (e.g., CPU, GPU).
             kwargs: InpaintingPipeline properties
         )")
-        .def("get_generation_config", &ov::genai::InpaintingPipeline::get_generation_config)
-        .def("set_generation_config", &ov::genai::InpaintingPipeline::set_generation_config, py::arg("generation_config"))
+        .def("get_generation_config", &ov::genai::InpaintingPipeline::get_generation_config, py::return_value_policy::copy)
+        .def("set_generation_config", &ov::genai::InpaintingPipeline::set_generation_config, py::arg("config"))
         .def("set_scheduler", &ov::genai::InpaintingPipeline::set_scheduler, py::arg("scheduler"))
         .def("reshape", &ov::genai::InpaintingPipeline::reshape, py::arg("num_images_per_prompt"), py::arg("height"), py::arg("width"), py::arg("guidance_scale"))
         .def_static("stable_diffusion", &ov::genai::InpaintingPipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
diff --git a/src/python/py_llm_pipeline.cpp b/src/python/py_llm_pipeline.cpp
index b1d5136253..7360975a0b 100644
--- a/src/python/py_llm_pipeline.cpp
+++ b/src/python/py_llm_pipeline.cpp
@@ -53,15 +53,10 @@ py::object call_common_generate(
     const pyutils::PyBindStreamerVariant& py_streamer,
     const py::kwargs& kwargs
 ) {
-    ov::genai::GenerationConfig default_config;
-    if (config.has_value()) {
-        default_config = *config;
-    } else {
-        default_config = pipe.get_generation_config();
-    }
+    ov::genai::GenerationConfig default_config = config.has_value() ? *config : pipe.get_generation_config();
     auto updated_config = pyutils::update_config_from_kwargs(default_config, kwargs);
+
     py::object results;
-    EncodedInputs tensor_data;
     StreamerVariant streamer = pyutils::pystreamer_to_streamer(py_streamer);
 
     // Call suitable generate overload for each type of input.
diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp
index 45a0c46174..34522409ea 100644
--- a/src/python/py_utils.cpp
+++ b/src/python/py_utils.cpp
@@ -358,7 +358,10 @@ ov::genai::OptionalGenerationConfig update_config_from_kwargs(const ov::genai::O
     ov::genai::GenerationConfig res_config;
     if(config.has_value())
         res_config = *config;
-    res_config.update_generation_config(kwargs_to_any_map(kwargs));
+
+    if (!kwargs.empty())
+        res_config.update_generation_config(kwargs_to_any_map(kwargs));
+
     return res_config;
 }
 
diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp
index 340cb3da62..b0cfa0a42a 100644
--- a/src/python/py_vlm_pipeline.cpp
+++ b/src/python/py_vlm_pipeline.cpp
@@ -150,10 +150,10 @@ void init_vlm_pipeline(py::module_& m) {
 
         .def("start_chat", &ov::genai::VLMPipeline::start_chat, py::arg("system_message") = "")
         .def("finish_chat", &ov::genai::VLMPipeline::finish_chat)
-        .def("set_chat_template", &ov::genai::VLMPipeline::set_chat_template, py::arg("new_template"))
+        .def("set_chat_template", &ov::genai::VLMPipeline::set_chat_template, py::arg("chat_template"))
         .def("get_tokenizer", &ov::genai::VLMPipeline::get_tokenizer)
-        .def("get_generation_config", &ov::genai::VLMPipeline::get_generation_config)
-        .def("set_generation_config", &ov::genai::VLMPipeline::set_generation_config, py::arg("new_config"))
+        .def("get_generation_config", &ov::genai::VLMPipeline::get_generation_config, py::return_value_policy::copy)
+        .def("set_generation_config", &ov::genai::VLMPipeline::set_generation_config, py::arg("config"))
         .def(
             "generate",
             [](ov::genai::VLMPipeline& pipe,
diff --git a/src/python/py_whisper_pipeline.cpp b/src/python/py_whisper_pipeline.cpp
index cd42dcf58d..d290612ed6 100644
--- a/src/python/py_whisper_pipeline.cpp
+++ b/src/python/py_whisper_pipeline.cpp
@@ -187,7 +187,10 @@ OptionalWhisperGenerationConfig update_whisper_config_from_kwargs(const Optional
     WhisperGenerationConfig res_config;
     if (config.has_value())
         res_config = *config;
-    res_config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
+
+    if (!kwargs.empty())
+        res_config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
+
     return res_config;
 }
 
@@ -295,7 +298,12 @@ void init_whisper_pipeline(py::module_& m) {
         .def_readwrite("return_timestamps", &WhisperGenerationConfig::return_timestamps)
         .def_readwrite("initial_prompt", &WhisperGenerationConfig::initial_prompt)
         .def_readwrite("hotwords", &WhisperGenerationConfig::hotwords)
-        .def("set_eos_token_id", &WhisperGenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"));
+        .def("set_eos_token_id", &WhisperGenerationConfig::set_eos_token_id, py::arg("tokenizer_eos_token_id"))
+        .def("update_generation_config", [](
+            ov::genai::WhisperGenerationConfig& config,
+            const py::kwargs& kwargs) {
+            config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
+        });;
 
     py::class_<WhisperRawPerfMetrics>(m, "WhisperRawPerfMetrics", raw_perf_metrics_docstring)
         .def(py::init<>())
diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt
index 093cd993de..b8c2e625c5 100644
--- a/tests/cpp/CMakeLists.txt
+++ b/tests/cpp/CMakeLists.txt
@@ -25,8 +25,8 @@ file(GLOB src_files "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/sequence_group.cpp"
                     "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/continuous_batching*.cpp"
                     "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/text_callback_streamer.cpp")
 
-add_executable(${TEST_TARGET_NAME} ${tests_src}
-        block_allocator.cpp)
+add_executable(${TEST_TARGET_NAME} ${tests_src})
+
 target_link_libraries(${TEST_TARGET_NAME} PRIVATE openvino::genai gtest_main)
 target_include_directories(${TEST_TARGET_NAME} PRIVATE "${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src")
 target_sources(${TEST_TARGET_NAME} PRIVATE ${src_files})
diff --git a/tests/cpp/generate_config.cpp b/tests/cpp/generate_config.cpp
deleted file mode 100644
index 974fd499f8..0000000000
--- a/tests/cpp/generate_config.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright (C) 2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-
-#include <gtest/gtest.h>
-#include <openvino/core/except.hpp>
-#include "openvino/genai/generation_config.hpp"
-
-
-using namespace ov::genai;
-
-TEST(GenerationConfigTest, invalid_temperature) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.temperature = -0.1;
-    config.do_sample = true;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_temperature) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.temperature = 0.1;
-    EXPECT_NO_THROW(config.validate());
-}
-
-TEST(GenerationConfigTest, invalid_top_p) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.top_p = -0.5;
-    EXPECT_THROW(config.validate(), ov::Exception);
-    config.top_p = 1.1;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_top_p) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.top_p = 0.1;
-    EXPECT_NO_THROW(config.validate());
-}
-
-TEST(GenerationConfigTest, invalid_repeatition_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.repetition_penalty = -3.0;
-    EXPECT_THROW(config.validate(), ov::Exception);
-    config.repetition_penalty = -0.1;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_repeatition_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.repetition_penalty = 1.8;
-    EXPECT_NO_THROW(config.validate());
-    config.repetition_penalty = 0.1;
-    EXPECT_NO_THROW(config.validate());
-}
-
-TEST(GenerationConfigTest, invalid_presence_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.presence_penalty = 3.0;
-    EXPECT_THROW(config.validate(), ov::Exception);
-    config.presence_penalty = -3.1;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_presence_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.presence_penalty = 1.8;
-    EXPECT_NO_THROW(config.validate());
-    config.presence_penalty = -2.0;
-    EXPECT_NO_THROW(config.validate());
-}
-
-TEST(GenerationConfigTest, invalid_frequency_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.frequency_penalty = 3.0;
-    EXPECT_THROW(config.validate(), ov::Exception);
-    config.frequency_penalty = -3.1;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_frequency_penalty) {
-    GenerationConfig config;
-    config.max_new_tokens = 20;
-    config.do_sample = true;
-    config.frequency_penalty = 1.8;
-    EXPECT_NO_THROW(config.validate());
-    config.frequency_penalty = -2.0;
-    EXPECT_NO_THROW(config.validate());
-}
-
-ov::genai::GenerationConfig speculative_decoding_multinomial() {
-    auto speculative_decoding_multinomial_config = ov::genai::multinomial();
-    speculative_decoding_multinomial_config.num_assistant_tokens = 5;
-    return speculative_decoding_multinomial_config;
-}
-
-ov::genai::GenerationConfig speculative_decoding_greedy() {
-    auto speculative_decoding_greedy_config = ov::genai::greedy();
-    speculative_decoding_greedy_config.assistant_confidence_threshold = 0.4f;
-    return speculative_decoding_greedy_config;
-}
-
-TEST(GenerationConfigTest, invalid_static_spec_decoding) {
-    GenerationConfig config = speculative_decoding_greedy();
-    config.num_assistant_tokens = 5;
-    config.assistant_confidence_threshold = 0.2;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_static_spec_decoding) {
-    GenerationConfig config = speculative_decoding_greedy();
-    config.num_assistant_tokens = 5;
-    config.assistant_confidence_threshold = 0;
-    EXPECT_NO_THROW(config.validate());
-}
-
-TEST(GenerationConfigTest, invalid_dynamic_spec_decoding) {
-    GenerationConfig config = speculative_decoding_greedy();
-    config.num_assistant_tokens = 5;
-    config.assistant_confidence_threshold = 0.5;
-    EXPECT_THROW(config.validate(), ov::Exception);
-}
-
-TEST(GenerationConfigTest, valid_dynamic_spec_decoding) {
-    GenerationConfig config = speculative_decoding_greedy();
-    config.assistant_confidence_threshold = 0.5;
-    config.num_assistant_tokens = 0;
-    EXPECT_NO_THROW(config.validate());
-}
diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
index f940d272ed..9040fa435f 100644
--- a/tests/python_tests/common.py
+++ b/tests/python_tests/common.py
@@ -73,6 +73,7 @@ def get_beam_search() -> GenerationConfig:
     generation_config = GenerationConfig()
     generation_config.num_beam_groups = 3
     generation_config.num_beams = 6
+    generation_config.diversity_penalty = 1
     generation_config.max_new_tokens = 30
     generation_config.num_return_sequences = 3
     generation_config.num_return_sequences = generation_config.num_beams
@@ -82,6 +83,7 @@ def get_beam_search_min_and_max_tokens() -> GenerationConfig:
     generation_config = GenerationConfig()
     generation_config.num_beam_groups = 3
     generation_config.num_beams = 6
+    generation_config.diversity_penalty = 1
     generation_config.min_new_tokens = 15
     generation_config.max_new_tokens = 30
     generation_config.num_return_sequences = 3
@@ -92,6 +94,7 @@ def get_beam_search_with_single_stop_string() -> GenerationConfig:
     generation_config = GenerationConfig()
     generation_config.num_beam_groups = 3
     generation_config.num_beams = 6
+    generation_config.diversity_penalty = 1
     generation_config.max_new_tokens = 50
     generation_config.num_return_sequences = generation_config.num_beams
     generation_config.stop_strings = {"open sour"}  # expected match on "open source"
@@ -102,6 +105,7 @@ def get_beam_search_with_multiple_stop_strings() -> GenerationConfig:
     generation_config = GenerationConfig()
     generation_config.num_beam_groups = 3
     generation_config.num_beams = 6
+    generation_config.diversity_penalty = 1
     generation_config.max_new_tokens = 50
     generation_config.num_return_sequences = generation_config.num_beams
     generation_config.stop_strings = {".", "software", "Intel"}
@@ -112,6 +116,7 @@ def get_beam_search_with_multiple_stop_strings_no_match() -> GenerationConfig:
     generation_config = GenerationConfig()
     generation_config.num_beam_groups = 3
     generation_config.num_beams = 6
+    generation_config.diversity_penalty = 1
     generation_config.max_new_tokens = 30
     generation_config.num_return_sequences = generation_config.num_beams
     generation_config.stop_strings = {"Einstein", "sunny", "geothermal"}
@@ -299,7 +304,7 @@ def convert_to_hf(
     kwargs['pad_token_id'] = default_generation_config.pad_token_id
     kwargs['repetition_penalty'] = generation_config.repetition_penalty
 
-    if generation_config.num_beams > 1:
+    if generation_config.is_beam_search():
         # beam search case
         kwargs['num_beam_groups'] = generation_config.num_beam_groups
         kwargs['num_beams'] = generation_config.num_beams
@@ -309,7 +314,7 @@ def convert_to_hf(
         kwargs['output_scores'] = True
         if generation_config.num_beam_groups > 1:
             kwargs['diversity_penalty'] = generation_config.diversity_penalty
-    elif generation_config.do_sample:
+    elif generation_config.is_multinomial():
         # mulitinomial
         kwargs['temperature'] = generation_config.temperature
         kwargs['top_k'] = generation_config.top_k
diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
index 3fc89cb8a7..9e8e4681f9 100644
--- a/tests/python_tests/ov_genai_test_utils.py
+++ b/tests/python_tests/ov_genai_test_utils.py
@@ -111,7 +111,7 @@ def read_model(params, **tokenizer_kwargs):
         path,
         hf_tokenizer,
         opt_model,
-        ov_genai.LLMPipeline(path, 'CPU', **{'ENABLE_MMAP': False}),
+        ov_genai.LLMPipeline(path, 'CPU', ENABLE_MMAP=False),
     )
 
 
@@ -139,7 +139,7 @@ def model_tmp_path(tmpdir_factory):
 
 
 @pytest.fixture(scope="module")
-def model_tokenizers_path_tmp_path(tmpdir_factory):
+def model_tokenizers_tmp_path(tmpdir_factory):
     model_id, path, _, _, _ = read_model(get_models_list()[0])
     temp_path = tmpdir_factory.mktemp(model_id.replace('/', '_'))
 
@@ -180,10 +180,15 @@ def load_genai_pipe_with_configs(configs: List[Tuple], temp_path):
     for config_json, config_name in configs:
         with (temp_path / config_name).open('w') as f:
             json.dump(config_json, f)
-    return ov_genai.LLMPipeline(temp_path, 'CPU')
+
+    ov_pipe = ov_genai.LLMPipeline(temp_path, 'CPU')
+
+    for _, config_name in configs:
+        os.remove(temp_path / config_name)
+
+    return ov_pipe
 
 
 @functools.lru_cache(1)
 def get_continuous_batching(path):
-    scheduler_config = ov_genai.SchedulerConfig()
-    return ov_genai.LLMPipeline(path, ov_genai.Tokenizer(path), 'CPU', **{"scheduler_config": scheduler_config})
+    return ov_genai.LLMPipeline(path, 'CPU', scheduler_config=ov_genai.SchedulerConfig())
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index 3a1e9fa092..01762bf9e3 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -105,7 +105,7 @@ def test_cb_streamer_vs_return_vs_stateful(prompt):
 
 generation_configs = [
     dict(do_sample=False, max_new_tokens=20),
-    dict(do_sample=False, num_beam_groups=3, num_beams=15, num_return_sequences=1, max_new_tokens=10, diversity_penalty=1.0)
+    dict(do_sample=False, num_beam_groups=3, num_beams=15, num_return_sequences=1, max_new_tokens=10, diversity_penalty=1.0, repetition_penalty=1.0)
 ]
 questions = [
     '1+1=',
@@ -113,19 +113,22 @@ def test_cb_streamer_vs_return_vs_stateful(prompt):
     'Why is the Sun yellow?',
     'What was my first question?'
 ]
-@pytest.mark.parametrize("generation_config", generation_configs[1:])
+@pytest.mark.parametrize("generation_config_kwargs", generation_configs[1:])
 @pytest.mark.parametrize("model_descr", get_chat_models_list())
 @pytest.mark.precommit
-def test_chat_scenario_vs_stateful(model_descr, generation_config: Dict):
+def test_chat_scenario_vs_stateful(model_descr, generation_config_kwargs: Dict):
     model_id, path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
     cb_pipe = get_continuous_batching(path)
 
     ov_pipe.start_chat()
     cb_pipe.start_chat()
 
+    generation_config = GenerationConfig(**generation_config_kwargs)
+    ov_pipe.set_generation_config(generation_config)
+
     for question in questions:
-        generated = cb_pipe.generate(question, **generation_config)
-        reference = ov_pipe.generate(question, **generation_config)
+        generated = cb_pipe.generate(question, generation_config=generation_config)
+        reference = ov_pipe.generate(question)
         assert generated == reference
 
     # Test that finish_chat() doesn't fail just in case.
@@ -168,9 +171,13 @@ def test_post_oom_health(tmp_path, sampling_config):
 # Pre-emption
 #
 
-def get_greedy_seq_len_300() -> GenerationConfig:
+def get_parallel_sampling_seq_len_300() -> GenerationConfig:
     generation_config = GenerationConfig()
-    generation_config.num_return_sequences = 3
+    # TODO: add generation_config.generator and return parameters below
+    # generation_config.num_return_sequences = 3
+    # generation_config.do_sample = True
+    # generation_config.top_k = 10
+    # generation_config.top_p = 0.5
     generation_config.max_new_tokens = 300
     return generation_config
 
@@ -178,14 +185,15 @@ def get_beam_search_seq_len_300() -> GenerationConfig:
     generation_config = GenerationConfig()
     generation_config.num_beam_groups = 3
     generation_config.num_beams = 6
+    generation_config.diversity_penalty = 1
     generation_config.max_new_tokens = 300
     generation_config.num_return_sequences = generation_config.num_beams
     return generation_config
 
 scheduler_params_list = [({"num_kv_blocks": 2, "dynamic_split_fuse": True, "max_num_batched_tokens": 256, "max_num_seqs": 256}, get_greedy()),
                          ({"num_kv_blocks": 2, "dynamic_split_fuse": False, "max_num_batched_tokens": 256, "max_num_seqs": 256}, get_greedy()),
-                         ({"num_kv_blocks": 10, "dynamic_split_fuse": True}, get_greedy_seq_len_300()),
-                         ({"num_kv_blocks": 10, "dynamic_split_fuse": False}, get_greedy_seq_len_300()),
+                         ({"num_kv_blocks": 10, "dynamic_split_fuse": True}, get_parallel_sampling_seq_len_300()),
+                         ({"num_kv_blocks": 10, "dynamic_split_fuse": False}, get_parallel_sampling_seq_len_300()),
                          ({"num_kv_blocks": 34, "dynamic_split_fuse": True, "max_num_batched_tokens": 256, "max_num_seqs": 256}, get_beam_search()),
                          ({"num_kv_blocks": 34, "dynamic_split_fuse": False, "max_num_batched_tokens": 256, "max_num_seqs": 256}, get_beam_search()),
                          ({"num_kv_blocks": 100, "dynamic_split_fuse": True}, get_beam_search_seq_len_300()),
diff --git a/tests/python_tests/test_generation_config.py b/tests/python_tests/test_generation_config.py
new file mode 100644
index 0000000000..110caaf0e5
--- /dev/null
+++ b/tests/python_tests/test_generation_config.py
@@ -0,0 +1,142 @@
+# Copyright (C) 2023-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from openvino_genai import GenerationConfig
+from typing import Tuple, List
+import json
+import os
+import pytest
+
+configs = [
+    # stop conditions
+    dict(max_new_tokens=12),
+    dict(max_length=12),
+    dict(stop_token_ids={2}),
+    dict(eos_token_id=1, stop_token_ids={1}),
+    dict(stop_strings={"a", "b"}),
+    dict(ignore_eos=True, max_new_tokens=10),
+    dict(ignore_eos=True, max_length=10),
+    dict(max_new_tokens=0, echo=True),
+    dict(min_new_tokens=1, max_new_tokens=1),
+    # multinomial
+    dict(max_new_tokens=1, do_sample=True, num_return_sequences=2),
+    dict(max_new_tokens=1, do_sample=True, top_k=1),
+    dict(max_new_tokens=1, do_sample=True, top_p=0.5),
+    dict(max_new_tokens=1, do_sample=True, temperature=0.5),
+    # beam search
+    dict(max_new_tokens=1, num_beams=2),
+    dict(max_new_tokens=1, num_beams=2, num_return_sequences=1),
+    dict(max_new_tokens=1, num_beams=2, num_return_sequences=2),
+    dict(max_new_tokens=1, num_beams=4, num_beam_groups=2, diversity_penalty=1.0),
+    dict(max_new_tokens=1, num_beams=4, length_penalty=1.0),
+    dict(max_new_tokens=1, num_beams=4, no_repeat_ngram_size=2),
+    # assistant generation
+    dict(max_new_tokens=1, assistant_confidence_threshold=0.5),
+    dict(max_new_tokens=1, num_assistant_tokens=2),
+    dict(max_new_tokens=1, num_assistant_tokens=2, max_ngram_size=2), # prompt lookup
+]
+@pytest.mark.parametrize("generation_config_kwargs", configs)
+@pytest.mark.precommit
+@pytest.mark.nightly
+def test_valid_configs(generation_config_kwargs):
+    config = GenerationConfig(**generation_config_kwargs)
+    config.validate()
+
+    config = GenerationConfig()
+    config.update_generation_config(**generation_config_kwargs)
+    config.validate()
+
+
+invalid_configs = [
+    dict(num_return_sequences=0), # no reason to run with empty output
+    dict(num_return_sequences=2), # beam search or multimonial is required
+    # stop conditions
+    dict(), # no stop conditions at all
+    dict(eos_token_id=1), # 'stop_token_ids' does not contain 'eos_token_id'
+    dict(eos_token_id=1, stop_token_ids={2}), # 'stop_token_ids' is not empty, but does not contain 'eos_token_id'
+    dict(ignore_eos=True),  # no 'max_new_tokens', no 'max_length' with 'ignore_eos'
+    dict(stop_token_ids={-1}), # value in 'stop_token_ids' must be non-negative 
+    dict(max_new_tokens=0), # max new tokens cannot be empty (only when 'echo' is True)
+    dict(max_new_tokens=10, min_new_tokens=20), # 'max_new_tokens' must be >= 'min_new_tokens'
+    # penalties
+    dict(max_new_tokens=1, repetition_penalty=-1.0), # invalid repetition_penalty
+    dict(max_new_tokens=1, presence_penalty=-3.0), # invalid presence_penalty
+    dict(max_new_tokens=1, frequency_penalty=3.0), # invalid frequency_penalty
+    # multinomial sampling
+    dict(max_new_tokens=1, do_sample=True, top_p=1.1), # 'top_p' must be within (0, 1] when 'do_sample' is True
+    dict(max_new_tokens=1, do_sample=True, top_p=0), # 'top_p' must be within (0, 1] when 'do_sample' is True
+    dict(max_new_tokens=1, do_sample=True, temperature=-1.0), # invalid temp
+    # parameters requiring multimonial
+    dict(max_new_tokens=1, top_k=1), # requires do_sample=True
+    dict(max_new_tokens=1, top_p=0.5), # requires do_sample=True
+    dict(max_new_tokens=1, temperature=2.0), # requires do_sample=True
+    # beam search
+    dict(max_new_tokens=1, num_beams=2, num_return_sequences=3), # 'num_beams' must be >= 'num_return_sequences'
+    dict(max_new_tokens=1, num_beams=3, num_beam_groups=2), # 'num_beams' must be divisible by 'num_beam_groups'
+    dict(max_new_tokens=1, num_beams=3, do_sample=True), # 'beam sample is not supported
+    dict(max_new_tokens=1, num_beams=3, no_repeat_ngram_size=0), # invalid 'no_repeat_ngram_size'
+    dict(max_new_tokens=1, num_beams=4, num_beam_groups=2, diversity_penalty=0.0), # 'diversity_penalty' should not be a default value
+    dict(max_new_tokens=1, num_beams=4, diversity_penalty=1.0), # 'diversity_penalty' is used only for grouped beam search
+    dict(max_new_tokens=1, num_beams=2, frequency_penalty=1.0), # 'frequency_penalty' is not supported by beam search
+    dict(max_new_tokens=1, num_beams=2, presence_penalty=1.0), # 'presence_penalty' is not supported by beam search
+    dict(max_new_tokens=1, num_beams=2, repetition_penalty=0.0), # 'repetition_penalty' is not supported by beam search
+    # parameters requiring beam search
+    dict(max_new_tokens=1, num_beam_groups=2), # requiring beam search
+    dict(max_new_tokens=1, no_repeat_ngram_size=2), # requiring beam search
+    dict(max_new_tokens=1, diversity_penalty=1.0), # requiring beam search
+    dict(max_new_tokens=1, length_penalty=2), # requiring beam search
+    # assistant generation
+    dict(max_new_tokens=1, num_assistant_tokens=2, do_sample=True, num_return_sequences=2), # 'num_return_sequences' must be 1, as we cannot use different number of tokens per sequence within a group
+    dict(max_new_tokens=1, assistant_confidence_threshold=1.0, do_sample=True, num_return_sequences=2), # 'num_return_sequences' must be 1, as we cannot use different number of tokens per sequence within a group
+    dict(max_new_tokens=1, num_assistant_tokens=2, num_beams=2), # beam search is not compatible with assistant generation
+    dict(max_new_tokens=1, assistant_confidence_threshold=1.0, num_assistant_tokens=2), # 'assistant_confidence_threshold' and 'num_assistant_tokens' are mutually exclusive
+    dict(max_new_tokens=1, max_ngram_size=1), # 'max_ngram_size' is for prompt lookup, but assistant generation is turned off ('num_assistant_tokens' is 0)
+    # TODO: add tests for invalid properties
+]
+@pytest.mark.parametrize("generation_config_kwargs", invalid_configs)
+@pytest.mark.precommit
+@pytest.mark.nightly
+def test_invalid_generation_configs_throws(generation_config_kwargs):
+    config = GenerationConfig(**generation_config_kwargs)
+    with pytest.raises(RuntimeError):
+        config.validate()
+
+    config = GenerationConfig()
+    config.update_generation_config(**generation_config_kwargs)
+    with pytest.raises(RuntimeError):
+        config.validate()
+
+
+def load_genai_generation_config_from_file(configs: List[Tuple], temp_path):
+    for json_file in temp_path.glob("*.json"):
+        json_file.unlink()
+
+    for config_json, config_name in configs:
+        with (temp_path / config_name).open('w') as f:
+            json.dump(config_json, f)
+
+    ov_generation_config = GenerationConfig(temp_path / "generation_config.json")
+
+    for _, config_name in configs:
+        os.remove(temp_path / config_name)
+
+    return ov_generation_config
+
+@pytest.mark.precommit
+@pytest.mark.nightly
+def test_multiple_eos_are_read_as_stop_token_ids(tmp_path):
+    generation_config_json = {
+        "eos_token_id": [
+            2,
+            32000,
+            32007
+        ]
+    }
+    configs = [
+        (generation_config_json, "generation_config.json"),
+    ]
+
+    generation_config = load_genai_generation_config_from_file(configs, tmp_path)
+
+    assert generation_config.eos_token_id == 2
+    assert generation_config.stop_token_ids == { 2, 32000, 32007 }
diff --git a/tests/python_tests/test_kv_cache_eviction.py b/tests/python_tests/test_kv_cache_eviction.py
index bbd0da6bb2..6228f53dd1 100644
--- a/tests/python_tests/test_kv_cache_eviction.py
+++ b/tests/python_tests/test_kv_cache_eviction.py
@@ -147,7 +147,6 @@ def test_cache_optimized_generation_is_similar_to_unoptimized(converted_model, t
 
 def get_greedy_seq_len_300() -> GenerationConfig:
     generation_config = GenerationConfig()
-    generation_config.num_return_sequences = 3
     generation_config.max_new_tokens = 300
     return generation_config
 
@@ -155,6 +154,7 @@ def get_beam_search_seq_len_300() -> GenerationConfig:
     generation_config = GenerationConfig()
     generation_config.num_beam_groups = 3
     generation_config.num_beams = 6
+    generation_config.diversity_penalty = 1
     generation_config.max_new_tokens = 300
     generation_config.num_return_sequences = generation_config.num_beams
     return generation_config
diff --git a/tests/python_tests/test_llm_pipeline.py b/tests/python_tests/test_llm_pipeline.py
index 9f00996a58..6e3cce06d0 100644
--- a/tests/python_tests/test_llm_pipeline.py
+++ b/tests/python_tests/test_llm_pipeline.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import openvino_genai as ov_genai
-from openvino_genai import StopCriteria
+from openvino_genai import StopCriteria, GenerationConfig
 import pytest
 from typing import Union, List, Dict, Optional
 import numpy as np
@@ -18,7 +18,6 @@
     get_chat_models_list,
     model_tmp_path,
     STOP_CRITERIA_MAP,
-    get_continuous_batching,
 )
 
 
@@ -299,11 +298,10 @@ def test_batch_size_switch():
 #
 
 generation_configs = [
-    dict(do_sample=False, max_new_tokens=20),
-    dict(do_sample=False, num_beam_groups=3, num_beams=15, num_return_sequences=1, max_new_tokens=10, diversity_penalty=1.0)
+    dict(max_new_tokens=20),
+    dict(max_new_tokens=10, num_beam_groups=3, num_beams=15, num_return_sequences=1, diversity_penalty=1.0)
 ]
 
-
 questions = [
     '1+1=',
     'What is the previous answer?',
@@ -311,12 +309,11 @@ def test_batch_size_switch():
     'What was my first question?'
 ]
 
-
-@pytest.mark.parametrize("generation_config", generation_configs)
+@pytest.mark.parametrize("generation_config_kwargs", generation_configs)
 @pytest.mark.parametrize("model_descr", get_chat_models_list())
 @pytest.mark.precommit
 @pytest.mark.nightly
-def test_chat_compare_with_HF(model_descr, generation_config: Dict):
+def test_chat_compare_with_HF(model_descr, generation_config_kwargs: Dict):
     chat_history_hf = []
     chat_history_ov = []
     chat_prompt = ''
@@ -324,6 +321,10 @@ def test_chat_compare_with_HF(model_descr, generation_config: Dict):
     # Will set add_special_tokens=False inside pipeline when start_chat() is called.
     model_id, path, tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
 
+    from transformers import GenerationConfig as HFGenerationConfig
+    hf_generation_config = HFGenerationConfig(**generation_config_kwargs)
+    ov_generation_config = GenerationConfig(**generation_config_kwargs)
+
     ov_pipe.start_chat()
     for prompt in questions:
         chat_history_hf.append({'role': 'user', 'content': prompt})
@@ -332,11 +333,11 @@ def test_chat_compare_with_HF(model_descr, generation_config: Dict):
         chat_prompt = tokenizer.apply_chat_template(chat_history_hf, tokenize=False, add_generation_prompt=True)
         tokenized = tokenizer(chat_prompt, return_tensors='pt', add_special_tokens=False)
 
-        answer = opt_model.generate(**tokenized, **generation_config)
+        answer = opt_model.generate(**tokenized, generation_config=hf_generation_config)
         answer_str = tokenizer.decode(answer[0, tokenized['input_ids'].numel():], skip_special_tokens=True)
         chat_history_hf.append({'role': 'assistant', 'content': answer_str})
 
-        answer_ov = ov_pipe.generate(prompt, **generation_config)
+        answer_ov = ov_pipe.generate(prompt, generation_config=ov_generation_config)
         chat_history_ov.append({'role': 'assistant', 'content': answer_ov})
 
     ov_pipe.finish_chat()
@@ -492,30 +493,9 @@ def test_operator_with_streamer_kwargs_batch_throws():
         ov_pipe('', num_beams=2, streamer=printer)
 
 #
-# Tests on generation configs (invalid cases and handling within LLMPipeline)
+# Tests on generation configs handling
 #
 
-invalid_configs = [
-    dict(num_beam_groups=3, num_beams=15, do_sample=True),
-    # TODO: CVS-158682 eos_token_id is still read from tiny-random-phi3 and we cannot modify RTInfo in tests
-    # dict(do_sample=True),  # no eos_token_id no max_new_tokens, no max_len
-    dict(eos_token_id=42, ignore_eos=True),  # no max_new_tokens, no max_len with ignore_eos
-    dict(repetition_penalty=-1.0, eos_token_id=42, max_new_tokens=20), # invalid penalty
-    dict(temperature=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid temp
-    dict(top_p=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_p
-    dict(top_k=0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_k
-]
-@pytest.mark.parametrize("generation_config", invalid_configs)
-@pytest.mark.precommit
-@pytest.mark.nightly
-def test_invalid_generation_configs_throws(model_tmp_path, generation_config):
-    model_id, temp_path = model_tmp_path
-    config_json = {}
-    ov_pipe = load_genai_pipe_with_configs([(config_json, "config.json")], temp_path)
-    with pytest.raises(RuntimeError):
-        ov_pipe.generate('blah blah', **generation_config)
-
-
 @pytest.mark.precommit
 @pytest.mark.nightly
 def test_eos_token_is_inherited_from_default_generation_config(model_tmp_path):
@@ -529,28 +509,14 @@ def test_eos_token_is_inherited_from_default_generation_config(model_tmp_path):
     assert 37 == ov_pipe.get_generation_config().eos_token_id
 
 
-invalid_py_configs = [
-    dict(num_beam_groups=3, num_beams=15, do_sample=True),
-    # TODO: Currently unexpected params do not cause exceptions. Need to implement it in c++ and return this test
-  #  dict(unexisting_key_name=True),  # no eos_token_id no max_new_tokens, no max_len
-    dict(eos_token_id=42, ignore_eos=True),  # no max_new_tokens, no max_len with ignore_eos
-    dict(repetition_penalty=-1.0, eos_token_id=42, max_new_tokens=20), # invalid penalty
-    dict(temperature=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid temp
-    dict(top_p=-1.0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_p
-    dict(top_k=0, do_sample=True, eos_token_id=42, max_new_tokens=20), # invalid top_k
-]
 @pytest.mark.precommit
 @pytest.mark.nightly
-@pytest.mark.parametrize("generation_config", invalid_py_configs)
-def test_python_generation_config_validation_throws(model_tmp_path, generation_config):
-    model_id, temp_path = model_tmp_path
-    ov_pipe = load_genai_pipe_with_configs([({"eos_token_id": 37}, "config.json")], temp_path)
-
-    # 'unexisting_key_name' key validity is checked in pybind and ValueError will be returned
-    #  instead of RuntimeError, which is returned when GenerationConfig values are validated
-    return_exception_type = ValueError if 'unexisting_key_name' in generation_config else RuntimeError
-    with pytest.raises(return_exception_type):
-        ov_pipe.set_generation_config(ov_genai.GenerationConfig(**generation_config))
+def test_pipeline_validates_generation_config():
+    model_id, path = 'katuni4ka/tiny-random-phi3', Path('tiny-random-phi3')
+    ov_pipe = read_model((model_id, path))[4]
+    invalid_generation_config = dict(num_beam_groups=3, num_beams=15, do_sample=True) # beam sample is not supported
+    with pytest.raises(RuntimeError):
+        ov_pipe.generate("dummy prompt", **invalid_generation_config)
 
 #
 # Work with Unicode in Python API
@@ -699,7 +665,7 @@ def test_stop_token_ids():
     res = ov_pipe.generate(
         ov.Tensor([(1,)]),
         max_new_tokens=3,
-        stop_token_ids={-1, 9935, ov_pipe.get_tokenizer().get_eos_token_id()},
+        stop_token_ids={9935, ov_pipe.get_tokenizer().get_eos_token_id()},
         include_stop_str_in_output=False
     )
     assert 2 == len(res.tokens[0])
diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py
index 0c2a106d50..8129298763 100644
--- a/tests/python_tests/test_tokenizer.py
+++ b/tests/python_tests/test_tokenizer.py
@@ -1,6 +1,7 @@
 # Copyright (C) 2023-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import os
 import pytest
 import numpy as np
 from transformers import AutoTokenizer
@@ -17,15 +18,19 @@
 
 
 def load_genai_tokenizer_with_configs(configs: List[Tuple], temp_path):
-    # load Tokenizer where all configs are cleared.
-    # remove existing jsons from previous tests
     for json_file in temp_path.glob("*.json"):
         json_file.unlink()
 
     for config_json, config_name in configs:
         with (temp_path / config_name).open('w') as f:
             json.dump(config_json, f)
-    return openvino_genai.Tokenizer(temp_path)
+
+    ov_tokenizer = openvino_genai.Tokenizer(temp_path)
+
+    for _, config_name in configs:
+        os.remove(temp_path / config_name)
+
+    return ov_tokenizer
 
 
 def get_chat_templates():
@@ -181,7 +186,7 @@ def test_apply_chat_template(model_tmp_path, chat_config: Tuple[str, Dict]):
 @pytest.mark.nightly
 def test_set_chat_template():
     model_descr = get_chat_models_list()[0]
-    model_id, path, hf_tokenizer, model_opt, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
+    model_id, path, hf_tokenizer, opt_model, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
 
     prompt = "how are you?"
     dummy_conversation = [
@@ -265,7 +270,7 @@ def test_load_special_tokens_from_special_tokens_map_json(model_tmp_path):
 @pytest.mark.precommit
 @pytest.mark.nightly
 @pytest.mark.skip(reason="CVS-158682 - RTInfo is not modified in tests for unknown reasons")
-def test_load_special_tokens_from_tokenizer_config_json(model_tokenizers_path_tmp_path):
+def test_load_special_tokens_from_tokenizer_config_json(model_tokenizers_tmp_path):
     # special_tokens_map is not available
     # but tokenize_config.json exists
     # will load both string and integer representations
@@ -280,7 +285,7 @@ def test_load_special_tokens_from_tokenizer_config_json(model_tokenizers_path_tm
         "eos_token": "</s>",
     }
 
-    tok = load_genai_tokenizer_with_configs([(tok_config_json, "tokenizer_config.json")], model_tokenizers_path_tmp_path[1])
+    tok = load_genai_tokenizer_with_configs([(tok_config_json, "tokenizer_config.json")], model_tokenizers_tmp_path[1])
     assert tok.get_pad_token() == tok_config_json['pad_token']
     assert tok.get_bos_token() == tok_config_json['bos_token']
     assert tok.get_eos_token() == tok_config_json['eos_token']
diff --git a/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp b/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp
index 6cf462fdf8..e0c50cda02 100644
--- a/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp
+++ b/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp
@@ -123,11 +123,6 @@ Dataset filtered_dataset(const std::string& models_path, const std::string& data
         ov::genai::GenerationConfig greedy_search = ov::genai::greedy();
         greedy_search.max_new_tokens = std::min(max_output_len, output_len);
         greedy_search.ignore_eos = true;
-        greedy_search.repetition_penalty = 1.0;
-        greedy_search.frequency_penalty = 0.0;
-        greedy_search.presence_penalty = 0.0;
-        greedy_search.diversity_penalty = 0.0;
-        greedy_search.length_penalty = 0.0;
 
         dataset.push_data(human_question, greedy_search);
         dataset.push_lens(input_len, output_len);