diff --git a/inference-spec/engines/llama.cpp.yaml b/inference-spec/engines/llama.cpp.yaml
index 2d8ec4b7..d1823a99 100644
--- a/inference-spec/engines/llama.cpp.yaml
+++ b/inference-spec/engines/llama.cpp.yaml
@@ -55,10 +55,6 @@ commands:
         - name: "--no-webui"
           description: "Disable the Web UI"
           if: "{{ args.webui == 'off' }}"
-        - name: "--flash-attn"
-          description: "Set Flash Attention use"
-          value: "on"
-          if: "{{ host.uses_nvidia or host.uses_metal }}"
         - name: "-ngl"
           description: "Number of layers to offload to the GPU if available"
           value: "{{ 999 if args.ngl < 0 else args.ngl }}"
diff --git a/ramalama/command/context.py b/ramalama/command/context.py
index e92e9a66..6ebb4c40 100644
--- a/ramalama/command/context.py
+++ b/ramalama/command/context.py
@@ -169,9 +169,10 @@ class RamalamaCommandContext:
             model = cli_args.model
         else:
             model = None
+
         host = RamalamaHostContext(
             is_container,
-            check_nvidia() is None,
+            check_nvidia() is not None,
             check_metal(argparse.Namespace(**{"container": is_container})),
             should_colorize(),
             os.getenv("RAMALAMA_LLAMACPP_RPC_NODES", None),
diff --git a/test/e2e/test_serve.py b/test/e2e/test_serve.py
index 497d7964..ce316547 100644
--- a/test/e2e/test_serve.py
+++ b/test/e2e/test_serve.py
@@ -16,7 +16,6 @@ from test.conftest import (
     skip_if_docker,
     skip_if_gh_actions_darwin,
     skip_if_no_container,
-    skip_if_not_darwin,
     skip_if_ppc64le,
     skip_if_s390x,
 )
@@ -182,10 +181,6 @@ def test_basic_dry_run():
             [], r".*--cache-reuse 256", None, None, True,
             id="check --cache-reuse default value", marks=skip_if_container
         ),
-        pytest.param(
-            [], r".*--flash-attn", None, None, True,
-            id="check --flash-attn", marks=[skip_if_container, skip_if_not_darwin]
-        ),
         pytest.param(
             ["--host", "127.0.0.1"],
             r".*--host 127.0.0.1", None, None, True,
diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats
index cac45ced..f9256e16 100755
--- a/test/system/040-serve.bats
+++ b/test/system/040-serve.bats
@@ -73,9 +73,6 @@ verify_begin=".*run --rm"
 	run_ramalama -q --dryrun serve ${model}
 	assert "$output" =~ ".*--host 0.0.0.0" "Outside container sets host to 0.0.0.0"
 	is "$output" ".*--cache-reuse 256" "should use cache"
-	if is_darwin; then
-	   is "$output" ".*--flash-attn on" "use flash-attn on Darwin metal"
-	fi
 
 	run_ramalama -q --dryrun serve --seed abcd --host 127.0.0.1 ${model}
 	assert "$output" =~ ".*--host 127.0.0.1" "Outside container overrides host to 127.0.0.1"
diff --git a/test/unit/command/data/engines/llama.cpp.missing.version.yaml b/test/unit/command/data/engines/llama.cpp.missing.version.yaml
index 8d3a655e..c21eeaba 100644
--- a/test/unit/command/data/engines/llama.cpp.missing.version.yaml
+++ b/test/unit/command/data/engines/llama.cpp.missing.version.yaml
@@ -53,10 +53,6 @@ commands:
         - name: "--no-webui"
           description: "Disable the Web UI"
           if: "{{ args.webui == 'off' }}"
-        - name: "--flash-attn"
-          description: "Set Flash Attention use"
-          value: "on"
-          if: "{{ host.uses_nvidia or host.uses_metal }}"
         - name: "-ngl"
           description: "Number of layers to offload to the GPU if available"
           value: "{{ 999 if args.ngl < 0 else args.ngl }}"
diff --git a/test/unit/command/data/engines/llama.cpp.yaml b/test/unit/command/data/engines/llama.cpp.yaml
index 6c49407a..79c8d328 100644
--- a/test/unit/command/data/engines/llama.cpp.yaml
+++ b/test/unit/command/data/engines/llama.cpp.yaml
@@ -55,10 +55,6 @@ commands:
         - name: "--no-webui"
           description: "Disable the Web UI"
           if: "{{ args.webui == 'off' }}"
-        - name: "--flash-attn"
-          description: "Set Flash Attention use"
-          value: "on"
-          if: "{{ host.uses_nvidia or host.uses_metal }}"
         - name: "-ngl"
           description: "Number of layers to offload to the GPU if available"
           value: "{{ 999 if args.ngl < 0 else args.ngl }}"
diff --git a/test/unit/command/test_factory.py b/test/unit/command/test_factory.py
index e503efd7..23f01400 100644
--- a/test/unit/command/test_factory.py
+++ b/test/unit/command/test_factory.py
@@ -83,23 +83,23 @@ class FactoryInput:
     [
         (
             FactoryInput(),
-            "llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --chat-template-file /path/to/chat-template --jinja --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v --flash-attn on -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on --another-arg 44 --more-args",  # noqa: E501
+            "llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --chat-template-file /path/to/chat-template --jinja --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on --another-arg 44 --more-args",  # noqa: E501
         ),
         (
             FactoryInput(has_mmproj=True),
-            "llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --mmproj /path/to/mmproj --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v --flash-attn on -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on --another-arg 44 --more-args",  # noqa: E501
+            "llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --mmproj /path/to/mmproj --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on --another-arg 44 --more-args",  # noqa: E501
         ),
         (
             FactoryInput(has_chat_template=False),
-            "llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --jinja --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v --flash-attn on -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on --another-arg 44 --more-args",  # noqa: E501
+            "llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --jinja --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on --another-arg 44 --more-args",  # noqa: E501
         ),
         (
             FactoryInput(cli_args=CLIArgs(runtime_args="")),
-            "llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --chat-template-file /path/to/chat-template --jinja --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v --flash-attn on -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on",  # noqa: E501
+            "llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --chat-template-file /path/to/chat-template --jinja --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on",  # noqa: E501
         ),
         (
             FactoryInput(cli_args=CLIArgs(max_tokens=99, runtime_args="")),
-            "llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --chat-template-file /path/to/chat-template --jinja --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v --flash-attn on -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on -n 99",  # noqa: E501
+            "llama-server --host 0.0.0.0 --port 1337 --log-file /var/tmp/ramalama.log --model /path/to/model --chat-template-file /path/to/chat-template --jinja --no-warmup --reasoning-budget 0 --alias library/smollm --ctx-size 512 --temp 11 --cache-reuse 1024 -v -ngl 44 --model-draft /path/to/draft-model -ngld 44 --threads 8 --seed 12345 --log-colors on -n 99",  # noqa: E501
         ),
     ],
 )