aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorflu0r1ne <flu0r1ne@flu0r1ne.net>2023-05-06 05:42:39 -0500
committerflu0r1ne <flu0r1ne@flu0r1ne.net>2023-05-06 05:42:52 -0500
commit603ebf9a866314b3304f800d50c09a3cd55d8546 (patch)
tree8409896f90999eb556c0e2d46cb9c0f7e607d43a /tests
parent537d08fd952a88a799eff4002d8e6f1d2c224258 (diff)
downloadgpt-chat-cli-603ebf9a866314b3304f800d50c09a3cd55d8546.tar.xz
gpt-chat-cli-603ebf9a866314b3304f800d50c09a3cd55d8546.zip
Add automatic code highlighting
Diffstat (limited to 'tests')
-rw-r--r--tests/test_streaming_lexer.py207
1 files changed, 207 insertions, 0 deletions
diff --git a/tests/test_streaming_lexer.py b/tests/test_streaming_lexer.py
new file mode 100644
index 0000000..cd03513
--- /dev/null
+++ b/tests/test_streaming_lexer.py
@@ -0,0 +1,207 @@
+import re
+from typing import Optional, Tuple
+from enum import Enum, auto
+from dataclasses import dataclass
+
+import pytest
+
+from src.gpt_chat_cli.streaming_lexer import (
+ MatchState,
+ CodeFenceContext,
+ _try_to_parse_code_fence,
+ SinglePassStreamingLexer,
+ Token,
+ TokenType,
+ TokenOrientation,
+ make_text_token
+)
+
+def test_try_to_parse_code_fence():
+ # Test valid cases
+ valid_cases = [
+ ("```python\nhe", CodeFenceContext(0, "python", 10)),
+ (" ```python\n", CodeFenceContext(2, "python", 12)),
+ ("~~~python\n", CodeFenceContext(0, "python", 10)),
+ (" ~~~python\nmore", CodeFenceContext(3, "python", 13))
+ ]
+
+
+ for case, expected in valid_cases:
+ result = _try_to_parse_code_fence(case)
+ assert result[0] == MatchState.MATCH
+ assert result[1] == expected
+
+ # Test invalid cases
+ invalid_cases = [
+ " ```python\n",
+ "~``python\n",
+ "```python ```\n",
+ "~~~python ~~~\n",
+ ]
+
+ for case in invalid_cases:
+ print(case)
+ result = _try_to_parse_code_fence(case)
+ assert result[0] == MatchState.MISMATCH
+
+ # Test indeterminate case
+ indeterminate_cases = [
+ "```",
+ " ~~~",
+ ]
+
+ for case in indeterminate_cases:
+ result = _try_to_parse_code_fence(case)
+ assert result[0] == MatchState.INDETERMINATE
+
+def _check_exact_lexing_matches( chunk_tokens, final_tokens ):
+
+ lexer = SinglePassStreamingLexer()
+
+ for ( chunk, expected_tokens ) in chunk_tokens:
+
+ lexer.add_chunk( chunk )
+
+ n_tokens_emitted = 0
+
+ for i, token in enumerate(lexer.parse()):
+ assert i < len(expected_tokens)
+ assert expected_tokens[i] == token
+
+ n_tokens_emitted += 1
+
+ assert n_tokens_emitted == len(expected_tokens)
+
+ lexer.finish()
+
+ n_tokens_emitted = 0
+
+ for i, token in enumerate(lexer.parse()):
+ assert i < len(final_tokens)
+ print(token)
+ assert final_tokens[i] == token
+
+ n_tokens_emitted += 1
+
+ assert n_tokens_emitted == len(final_tokens)
+
+
+def test_single_pass_lexing():
+
+ cases = [
+ ( 'Some text\n', [
+ make_text_token( 'Some text\n' )
+ ] ),
+ ( 'More text\n', [
+ make_text_token( 'More text\n' )
+ ] ),
+ ( ' Indented text\n', [
+ make_text_token( ' Indented text\n' )
+ ] ),
+ ( '```python\n', [
+ Token( TokenType.CODE_FENCE, TokenOrientation.BEGIN, 'python' )
+ ] ),
+ ( 'print("Hello")\n', [
+ make_text_token( 'print("Hello")\n' )
+ ] ),
+ ( '```', [] ),
+ ]
+
+ final_tokens = [
+ Token( TokenType.CODE_FENCE, TokenOrientation.END ),
+ Token( TokenType.EOF, TokenOrientation.NONE ),
+ ]
+
+ _check_exact_lexing_matches( cases, final_tokens )
+
+ cases = [
+ ( '```java\nSome text\nMore ', [
+ Token( TokenType.CODE_FENCE, TokenOrientation.BEGIN, 'java' ),
+ make_text_token( 'Some text\n' ),
+ make_text_token( 'More ' ),
+ ] ),
+ ( ' text\n```', [
+ make_text_token( ' text\n' ),
+ ] ),
+ ( '\n', [
+ Token( TokenType.CODE_FENCE, TokenOrientation.END )
+ ]),
+ ]
+
+ final_tokens = [
+ Token( TokenType.EOF, TokenOrientation.NONE ),
+ ]
+
+ _check_exact_lexing_matches( cases, final_tokens )
+
+ cases = [
+ ( ' ```java \n Some text\n More ', [
+ Token( TokenType.CODE_FENCE, TokenOrientation.BEGIN, 'java' ),
+ make_text_token( 'Some text\n' ),
+ make_text_token( 'More ' ),
+ ] ),
+ ( ' text\n ```', [
+ make_text_token( ' text\n' ),
+ ] ),
+ ( '\n', [
+ Token( TokenType.CODE_FENCE, TokenOrientation.END )
+ ]),
+ ]
+
+ final_tokens = [
+ Token( TokenType.EOF, TokenOrientation.NONE ),
+ ]
+
+ _check_exact_lexing_matches( cases, final_tokens )
+
+ cases = [
+ ( ' ``', []),
+ ('` java \n Some text\n More ', [
+ Token( TokenType.CODE_FENCE, TokenOrientation.BEGIN, 'java' ),
+ make_text_token( 'Some text\n' ),
+ make_text_token( 'More ' ),
+ ] ),
+ ( ' text\n ```', [
+ make_text_token( ' text\n' ),
+ ] ),
+ ( '\n', [
+ Token( TokenType.CODE_FENCE, TokenOrientation.END )
+ ]),
+ ]
+
+ final_tokens = [
+ Token( TokenType.EOF, TokenOrientation.NONE ),
+ ]
+
+ _check_exact_lexing_matches( cases, final_tokens )
+
+ # Ticks preceded by characters don't initiate a code block
+ cases = [
+ ( 'tick```java\nSome text\n', [
+ make_text_token( 'tick```java\n' ),
+ make_text_token( 'Some text\n' ),
+ ] ),
+ ]
+
+ final_tokens = [
+ Token( TokenType.EOF, TokenOrientation.NONE ),
+ ]
+
+ _check_exact_lexing_matches( cases, final_tokens )
+
+ # Code blocks which are not terminated, terminate
+ # at the end of the document
+ cases = [
+ ( '```java\nSome text\n', [
+ Token( TokenType.CODE_FENCE, TokenOrientation.BEGIN, 'java' ),
+ make_text_token( 'Some text\n' ),
+ ] ),
+ ]
+
+ final_tokens = [
+ Token( TokenType.CODE_FENCE, TokenOrientation.END ),
+ Token( TokenType.EOF, TokenOrientation.NONE ),
+ ]
+
+ _check_exact_lexing_matches( cases, final_tokens )
+