klasocki commited on
Commit
ca2592c
1 Parent(s): 65977ce

Integrate the fine-tuned comma fixed into the app

Browse files
Dockerfile CHANGED
@@ -10,9 +10,11 @@ COPY setup.py .
10
  RUN pip install --upgrade pip
11
  RUN pip install --no-cache-dir --upgrade .
12
 
13
- COPY commafixer/src/baseline.py commafixer/src/baseline.py
 
14
  ENV TRANSFORMERS_CACHE=/coma-fixer/.cache
15
- RUN python commafixer/src/baseline.py # This pre-downloads models and tokenizers
 
16
 
17
  COPY . .
18
 
 
10
  RUN pip install --upgrade pip
11
  RUN pip install --no-cache-dir --upgrade .
12
 
13
+ # This pre-downloads models and tokenizers
14
+ COPY commafixer/src/ commafixer/src/
15
  ENV TRANSFORMERS_CACHE=/coma-fixer/.cache
16
+ RUN python commafixer/src/baseline.py
17
+ RUN python commafixer/src/fixer.py
18
 
19
  COPY . .
20
 
app.py CHANGED
@@ -4,9 +4,10 @@ from fastapi import FastAPI
4
  from fastapi.responses import FileResponse
5
  from fastapi.staticfiles import StaticFiles
6
 
7
- from commafixer.routers import baseline
8
 
9
  app = FastAPI()
 
10
  app.include_router(baseline.router, prefix='/baseline')
11
 
12
  # Without the realpath hack tests fail
 
4
  from fastapi.responses import FileResponse
5
  from fastapi.staticfiles import StaticFiles
6
 
7
+ from commafixer.routers import baseline, fixer
8
 
9
  app = FastAPI()
10
+ app.include_router(fixer.router, prefix='/fix-commas')
11
  app.include_router(baseline.router, prefix='/baseline')
12
 
13
  # Without the realpath hack tests fail
commafixer/routers/fixer.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ import logging
3
+
4
+ from commafixer.src.fixer import CommaFixer
5
+
6
+
7
+ logger = logging.Logger(__name__)
8
+ logging.basicConfig(level=logging.INFO)
9
+
10
+ router = APIRouter()
11
+
12
+ logger.info('Loading the main comma fixer model...')
13
+ router.model = CommaFixer()
14
+
15
+
16
+ @router.post('/')
17
+ async def fix_commas(data: dict):
18
+ json_field_name = 's'
19
+ if json_field_name in data:
20
+ logger.debug('Fixing commas.')
21
+ return {json_field_name: router.model.fix_commas(data['s'])}
22
+ else:
23
+ msg = f"Text '{json_field_name}' missing"
24
+ logger.debug(msg)
25
+ raise HTTPException(status_code=400, detail=msg)
commafixer/src/fixer.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from peft import PeftConfig, PeftModel
2
+ from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline, NerPipeline, RobertaTokenizerFast
3
+ import nltk
4
+ import re
5
+
6
+
7
+ class CommaFixer:
8
+ """
9
+ A wrapper class for the fine-tuned comma fixer model.
10
+ """
11
+
12
+ def __init__(self, device=-1):
13
+ self.id2label = {0: 'O', 1: 'B-COMMA'}
14
+ self.label2id = {'O': 0, 'B-COMMA': 1}
15
+ self.model, self.tokenizer = self._load_peft_model()
16
+
17
+ def fix_commas(self, s: str) -> str:
18
+ """
19
+ The main method for fixing commas using the fine-tuned model.
20
+ In the future we should think about batching the calls to it, for now it processes requests string by string.
21
+ :param s: A string with commas to fix, without length restrictions.
22
+ However, if the string is longer than the length limit (512 tokens), some whitespaces might be trimmed.
23
+ Example: comma_fixer.fix_commas("One two thre, and four!")
24
+ :return: A string with commas fixed, example: "One, two, thre and four!"
25
+ """
26
+ s_no_commas = re.sub(r'\s*,', '', s)
27
+ tokenized = self.tokenizer(s_no_commas, return_tensors='pt', return_offsets_mapping=True, return_length=True)
28
+
29
+ # If text too long, split into sentences and fix commas separately.
30
+ # TODO this is slow, we should think about joining them until length, or maybe a length limit to avoid
31
+ # stalling the whole service
32
+ if tokenized['length'][0] > self.tokenizer.model_max_length:
33
+ return ' '.join(self.fix_commas(sentence) for sentence in nltk.sent_tokenize(s))
34
+
35
+ logits = self.model(input_ids=tokenized['input_ids'], attention_mask=tokenized['attention_mask']).logits
36
+ labels = [self.id2label[tag_id.item()] for tag_id in logits.argmax(dim=2).flatten()]
37
+ return _fix_commas_based_on_labels_and_offsets(labels, s_no_commas, tokenized['offset_mapping'][0])
38
+
39
+ def _load_peft_model(self, model_name="klasocki/roberta-large-lora-ner-comma-fixer") -> tuple[
40
+ PeftModel, RobertaTokenizerFast]:
41
+ """
42
+ Creates the huggingface model and tokenizer.
43
+ Can also be used for pre-downloading the model and the tokenizer.
44
+ :param model_name: Name of the model on the huggingface hub.
45
+ :return: A model with the peft adapter injected and weights merged, and the tokenizer.
46
+ """
47
+ config = PeftConfig.from_pretrained(model_name)
48
+ inference_model = AutoModelForTokenClassification.from_pretrained(
49
+ config.base_model_name_or_path, num_labels=len(self.id2label), id2label=self.id2label,
50
+ label2id=self.label2id
51
+ )
52
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
53
+ model = PeftModel.from_pretrained(inference_model, model_name)
54
+ model = model.merge_and_unload() # Join LoRa matrices with the main model for faster inference
55
+ return model, tokenizer
56
+
57
+
58
+ def _fix_commas_based_on_labels_and_offsets(
59
+ labels: list[str],
60
+ original_s: str,
61
+ offset_map: list[tuple[int, int]]
62
+ ) -> str:
63
+ """
64
+ This function returns the original string with only commas fixed, based on the predicted labels from the main
65
+ model and the offsets from the tokenizer.
66
+ :param labels: Predicted labels for the tokens.
67
+ Should already be converted to string, since we will look for B-COMMA tags.
68
+ :param original_s: The original string, used to preserve original spacing and punctuation.
69
+ :param offset_map: List of offsets in the original string, we will only use the second integer of each pair
70
+ indicating where the token ended originally in the string.
71
+ :return: The string with commas fixed, and everything else intact.
72
+ """
73
+ result = original_s
74
+ commas_inserted = 0
75
+
76
+ for i, label in enumerate(labels):
77
+ current_offset = offset_map[i][1] + commas_inserted
78
+ if _should_insert_comma(label, result, current_offset):
79
+ result = result[:current_offset] + ',' + result[current_offset:]
80
+ commas_inserted += 1
81
+ return result
82
+
83
+
84
+ def _should_insert_comma(label, result, current_offset) -> bool:
85
+ # Only insert commas for the final token of a word, that is, if next word starts with a space.
86
+ return label == 'B-COMMA' and result[current_offset].isspace()
87
+
88
+
89
+ if __name__ == "__main__":
90
+ CommaFixer() # to pre-download the model and tokenizer
openapi.yaml CHANGED
@@ -6,6 +6,40 @@ info:
6
  servers:
7
  - url: 'https://localhost:5000'
8
  paths:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  /baseline/fix-commas:
10
  post:
11
  summary: Fixes comma placement in a sentence using the baseline model
 
6
  servers:
7
  - url: 'https://localhost:5000'
8
  paths:
9
+ /fix-commas:
10
+ post:
11
+ summary: Fixes comma placement in a sentence using the fine-tuned model
12
+ requestBody:
13
+ required: true
14
+ content:
15
+ application/json:
16
+ schema:
17
+ type: object
18
+ properties:
19
+ s:
20
+ type: string
21
+ example: 'This, is a sentence with wrong commas at least some.'
22
+ description: The text with commas to fix. Commas can be removed, added, reordered at will, or left
23
+ unchanged. Other punctuation, whitespaces and so on will stay intact.
24
+ responses:
25
+ 200:
26
+ description: Commas fixed.
27
+ content:
28
+ application/json:
29
+ schema:
30
+ type: object
31
+ properties:
32
+ s:
33
+ type: string
34
+ example: 'This is a sentence with wrong commas, at least some.'
35
+ description: A text with commas fixed, or unchanged if not necessary. Everything other that
36
+ commas will stay as it was originally.
37
+
38
+ 400:
39
+ description: A required field missing from the POST request body JSON.
40
+
41
+ # TODO remove duplication here
42
+
43
  /baseline/fix-commas:
44
  post:
45
  summary: Fixes comma placement in a sentence using the baseline model
setup.py CHANGED
@@ -11,6 +11,8 @@ setup(
11
  install_requires=[
12
  "fastapi == 0.101.1",
13
  "uvicorn == 0.23.2",
 
 
14
  "torch == 2.0.1",
15
  "transformers == 4.31.0",
16
  # for the tokenizer of the baseline model
@@ -22,7 +24,6 @@ setup(
22
  'training': [
23
  'datasets==2.14.4',
24
  'notebook',
25
- 'peft==0.5.0',
26
  'seqeval',
27
  'evaluate==0.4.0'
28
  ],
 
11
  install_requires=[
12
  "fastapi == 0.101.1",
13
  "uvicorn == 0.23.2",
14
+ "nltk == 3.8.1",
15
+ 'peft==0.5.0',
16
  "torch == 2.0.1",
17
  "transformers == 4.31.0",
18
  # for the tokenizer of the baseline model
 
24
  'training': [
25
  'datasets==2.14.4',
26
  'notebook',
 
27
  'seqeval',
28
  'evaluate==0.4.0'
29
  ],
static/index.html CHANGED
@@ -1,36 +1,45 @@
1
  <!DOCTYPE html>
2
  <html lang="en">
3
- <head>
4
- <meta charset="UTF-8" />
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
  <title>Fast API 🤗 Space served with Uvicorn</title>
7
- <link rel="stylesheet" href="style.css" />
8
  <script type="module" src="script.js"></script>
9
- </head>
10
- <body>
11
- <main>
12
- <section id="comma-fixing">
13
  <h2>Fixing commas using Transformers</h2>
14
  <p>
15
- Model:
16
- <a
17
- href="https://huggingface.co/oliverguhr/fullstop-punctuation-multilang-large"
18
- rel="noreferrer"
19
- target="_blank"
 
 
 
 
 
 
 
 
 
20
  >oliverguhr/fullstop-punctuation-multilang-large
21
- </a>
22
  </p>
23
  <form class="comma-fixing-form">
24
- <label for="comma-fixing-input">Text with incorrect commas</label>
25
- <input
26
- id="comma-fixing-input"
27
- type="text"
28
- value="This is however a very bad, and terrible sentence grammatically that is."
29
- />
30
- <button id="comma-fixing-submit">Submit</button>
31
- <p class="comma-fixing-output"></p>
32
  </form>
33
- </section>
34
- </main>
35
- </body>
36
  </html>
 
1
  <!DOCTYPE html>
2
  <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8"/>
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
6
  <title>Fast API 🤗 Space served with Uvicorn</title>
7
+ <link rel="stylesheet" href="style.css"/>
8
  <script type="module" src="script.js"></script>
9
+ </head>
10
+ <body>
11
+ <main>
12
+ <section id="comma-fixing">
13
  <h2>Fixing commas using Transformers</h2>
14
  <p>
15
+ Fine-tuned model:
16
+ <a
17
+ href="https://huggingface.co/klasocki/roberta-large-lora-ner-comma-fixer"
18
+ rel="noreferrer"
19
+ target="_blank"
20
+ >klasocki/roberta-large-lora-ner-comma-fixer
21
+ </a>
22
+ </p>
23
+ <p>
24
+ Baseline model:
25
+ <a
26
+ href="https://huggingface.co/oliverguhr/fullstop-punctuation-multilang-large"
27
+ rel="noreferrer"
28
+ target="_blank"
29
  >oliverguhr/fullstop-punctuation-multilang-large
30
+ </a>
31
  </p>
32
  <form class="comma-fixing-form">
33
+ <label for="comma-fixing-input">Text with incorrect commas</label>
34
+ <input
35
+ id="comma-fixing-input"
36
+ type="text"
37
+ value="This is however a very bad, and terrible sentence grammatically that is."
38
+ />
39
+ <button id="comma-fixing-submit">Submit</button>
40
+ <p class="comma-fixing-output"></p>
41
  </form>
42
+ </section>
43
+ </main>
44
+ </body>
45
  </html>
static/script.js CHANGED
@@ -1,7 +1,7 @@
1
  const commaFixingForm = document.querySelector(".comma-fixing-form");
2
 
3
  const fixCommas = async (text) => {
4
- const inferResponse = await fetch(`baseline/fix-commas/`, {
5
  method: "POST",
6
  body: JSON.stringify({
7
  s: text
@@ -9,10 +9,13 @@ const fixCommas = async (text) => {
9
  headers: {
10
  "Content-type": "application/json; charset=UTF-8"
11
  }
12
- });
13
- const inferJson = await inferResponse.json();
 
 
 
14
 
15
- return inferJson.s;
16
  };
17
 
18
  commaFixingForm.addEventListener("submit", async (event) => {
@@ -21,5 +24,7 @@ commaFixingForm.addEventListener("submit", async (event) => {
21
  const commaFixingInput = document.getElementById("comma-fixing-input");
22
  const commaFixingParagraph = document.querySelector(".comma-fixing-output");
23
 
24
- commaFixingParagraph.textContent = await fixCommas(commaFixingInput.value);
 
 
25
  });
 
1
  const commaFixingForm = document.querySelector(".comma-fixing-form");
2
 
3
  const fixCommas = async (text) => {
4
+ let request = {
5
  method: "POST",
6
  body: JSON.stringify({
7
  s: text
 
9
  headers: {
10
  "Content-type": "application/json; charset=UTF-8"
11
  }
12
+ };
13
+ const baselineResponse = await fetch(`baseline/fix-commas/`, request);
14
+ const fixerResponse = await fetch(`fix-commas/`, request);
15
+ const baselineJson = await baselineResponse.json();
16
+ const inferJson = await fixerResponse.json();
17
 
18
+ return {baseline: baselineJson.s, main: inferJson.s};
19
  };
20
 
21
  commaFixingForm.addEventListener("submit", async (event) => {
 
24
  const commaFixingInput = document.getElementById("comma-fixing-input");
25
  const commaFixingParagraph = document.querySelector(".comma-fixing-output");
26
 
27
+ const fixed = await fixCommas(commaFixingInput.value);
28
+
29
+ commaFixingParagraph.textContent = `Our model: ${fixed.main}\n\nBaseline model: ${fixed.baseline}`
30
  });
tests/{test_baseline.py → test_comma_fixers.py} RENAMED
@@ -1,12 +1,19 @@
1
  import pytest
2
  from commafixer.src.baseline import BaselineCommaFixer, _remove_punctuation
 
3
 
4
 
 
5
  @pytest.fixture()
6
  def baseline_fixer():
7
  yield BaselineCommaFixer()
8
 
9
 
 
 
 
 
 
10
  @pytest.mark.parametrize(
11
  "test_input",
12
  ['',
@@ -14,9 +21,40 @@ def baseline_fixer():
14
  'This test string should not have any commas inside it.',
15
  'aAaalLL the.. weird?~! punctuation.should also . be kept-as is! Only fixing-commas.']
16
  )
17
- def test_fix_commas_leaves_correct_strings_unchanged(baseline_fixer, test_input):
18
- result = baseline_fixer.fix_commas(s=test_input)
19
- assert result == test_input
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
  @pytest.mark.parametrize(
@@ -35,12 +73,13 @@ def test_fix_commas_leaves_correct_strings_unchanged(baseline_fixer, test_input)
35
  'nonetheless or we will fail this test.',
36
  ' This is a sentence. With a lot of useless punctuation!!??. O.o However, we have to insert commas O-O '
37
  'nonetheless, or we will fail this test.'],
38
- [" The ship 's secondary armament consisted of fourteen 45 @-@ calibre 6 @-@ inch ( 152 mm ) quick @-@ firing ( QF ) guns mounted in casemates . Lighter guns consisted of eight 47 @-@ millimetre ( 1 @.@ 9 in ) three @-@ pounder Hotchkiss guns and four 47 @-@ millimetre 2 @.@ 5 @-@ pounder Hotchkiss guns . The ship was also equipped with four submerged 18 @-@ inch torpedo tubes two on each broadside .",
39
- " The ship 's secondary armament consisted of fourteen 45 @-@ calibre 6 @-@ inch ( 152 mm ) quick @-@ firing ( QF ) guns mounted in casemates . Lighter guns consisted of eight 47 @-@ millimetre ( 1 @.@ 9 in ), three @-@ pounder Hotchkiss guns and four 47 @-@ millimetre 2 @.@ 5 @-@ pounder Hotchkiss guns . The ship was also equipped with four submerged 18 @-@ inch torpedo tubes, two on each broadside ."]
 
40
 
41
  ]
42
  )
43
- def test_fix_commas_fixes_incorrect_commas(baseline_fixer, test_input, expected):
44
  result = baseline_fixer.fix_commas(s=test_input)
45
  assert result == expected
46
 
 
1
  import pytest
2
  from commafixer.src.baseline import BaselineCommaFixer, _remove_punctuation
3
+ from commafixer.src.fixer import CommaFixer
4
 
5
 
6
+ # TODO look up best practices and duplication for tests like these
7
  @pytest.fixture()
8
  def baseline_fixer():
9
  yield BaselineCommaFixer()
10
 
11
 
12
+ @pytest.fixture()
13
+ def comma_fixer():
14
+ yield CommaFixer()
15
+
16
+
17
  @pytest.mark.parametrize(
18
  "test_input",
19
  ['',
 
21
  'This test string should not have any commas inside it.',
22
  'aAaalLL the.. weird?~! punctuation.should also . be kept-as is! Only fixing-commas.']
23
  )
24
+ class TestCorrectStringsAreUnchanged:
25
+ def test_model_fix_commas_leaves_correct_strings_unchanged(self, comma_fixer, test_input):
26
+ result = comma_fixer.fix_commas(s=test_input)
27
+ assert result == test_input
28
+
29
+ def test_baseline_fix_commas_leaves_correct_strings_unchanged(self, baseline_fixer, test_input):
30
+ result = baseline_fixer.fix_commas(s=test_input)
31
+ assert result == test_input
32
+
33
+
34
+ @pytest.mark.parametrize(
35
+ "test_input, expected",
36
+ [
37
+ ['I, am.', 'I am.'],
38
+ ['A complex clause however it misses a comma something else and a dot...?',
39
+ 'A complex clause, however, it misses a comma, something else and a dot...?'],
40
+ ['a pen an apple, \tand a pineapple!',
41
+ 'a pen, an apple \tand a pineapple!'],
42
+ ['Even newlines\ntabs\tand others get preserved.',
43
+ 'Even newlines,\ntabs\tand others get preserved.'],
44
+ ['I had no Creativity left, therefore, I come here, and write useless examples, for this test.',
45
+ 'I had no Creativity left, therefore I come here and write useless examples for this test.'],
46
+ [' This is a sentence. With, a lot of, useless punctuation!!??. O.o However we have to insert commas O-O, '
47
+ 'nonetheless or we will fail this test.',
48
+ ' This is a sentence. With a lot of useless punctuation!!??. O.o However, we have to insert commas O-O '
49
+ 'nonetheless, or we will fail this test.'],
50
+ [
51
+ " The ship 's secondary armament consisted of fourteen 45 @-@ calibre 6 @-@ inch ( 152 mm ) quick @-@ firing ( QF ) guns mounted in casemates . Lighter guns consisted of eight 47 @-@ millimetre ( 1 @.@ 9 in ) three @-@ pounder Hotchkiss guns and four 47 @-@ millimetre 2 @.@ 5 @-@ pounder Hotchkiss guns . The ship was also equipped with four submerged 18 @-@ inch torpedo tubes two on each broadside .",
52
+ " The ship 's secondary armament consisted of fourteen 45 @-@ calibre 6 @-@ inch ( 152 mm ) quick @-@ firing ( QF ) guns mounted in casemates . Lighter guns consisted of eight 47 @-@ millimetre ( 1 @.@ 9 in ) three @-@ pounder Hotchkiss guns and four 47 @-@ millimetre 2 @.@ 5 @-@ pounder Hotchkiss guns . The ship was also equipped with four submerged 18 @-@ inch torpedo tubes, two on each broadside ."]
53
+ ]
54
+ )
55
+ def test_main_model_fix_commas_fixes_correct_commas(comma_fixer, test_input, expected):
56
+ result = comma_fixer.fix_commas(s=test_input)
57
+ assert result == expected
58
 
59
 
60
  @pytest.mark.parametrize(
 
73
  'nonetheless or we will fail this test.',
74
  ' This is a sentence. With a lot of useless punctuation!!??. O.o However, we have to insert commas O-O '
75
  'nonetheless, or we will fail this test.'],
76
+ [
77
+ " The ship 's secondary armament consisted of fourteen 45 @-@ calibre 6 @-@ inch ( 152 mm ) quick @-@ firing ( QF ) guns mounted in casemates . Lighter guns consisted of eight 47 @-@ millimetre ( 1 @.@ 9 in ) three @-@ pounder Hotchkiss guns and four 47 @-@ millimetre 2 @.@ 5 @-@ pounder Hotchkiss guns . The ship was also equipped with four submerged 18 @-@ inch torpedo tubes two on each broadside .",
78
+ " The ship 's secondary armament consisted of fourteen 45 @-@ calibre 6 @-@ inch ( 152 mm ) quick @-@ firing ( QF ) guns mounted in casemates . Lighter guns consisted of eight 47 @-@ millimetre ( 1 @.@ 9 in ), three @-@ pounder Hotchkiss guns and four 47 @-@ millimetre 2 @.@ 5 @-@ pounder Hotchkiss guns . The ship was also equipped with four submerged 18 @-@ inch torpedo tubes, two on each broadside ."]
79
 
80
  ]
81
  )
82
+ def test_baseline_fix_commas_fixes_incorrect_commas(baseline_fixer, test_input, expected):
83
  result = baseline_fixer.fix_commas(s=test_input)
84
  assert result == expected
85
 
tests/test_integration.py CHANGED
@@ -4,50 +4,51 @@ import pytest
4
  from app import app
5
 
6
 
7
- @pytest.fixture()
8
- def client():
9
- yield TestClient(app)
10
-
11
-
12
- def test_fix_commas_fails_on_no_parameter(client):
13
- response = client.post('/baseline/fix-commas/')
14
- assert response.status_code == 422
15
-
16
-
17
- def test_fix_commas_fails_on_wrong_parameters(client):
18
- response = client.post('/baseline/fix-commas/', json={'text': "Some text."})
19
- assert response.status_code == 400
20
-
21
-
22
  @pytest.mark.parametrize(
23
- "test_input",
24
- ['',
25
- 'Hello world.',
26
- 'This test string should not have any commas inside it.']
27
  )
28
- def test_fix_commas_correct_string_unchanged(client, test_input: str):
29
- response = client.post('/baseline/fix-commas/', json={'s': test_input})
30
-
31
- assert response.status_code == 200
32
- assert response.json().get('s') == test_input
33
-
34
-
35
- @pytest.mark.parametrize(
36
- "test_input, expected",
37
- [['I am, here.', 'I am here.'],
38
- ['books pens and pencils',
39
- 'books, pens and pencils']]
40
- )
41
- def test_fix_commas_fixes_wrong_commas(client, test_input: str, expected: str):
42
- response = client.post('/baseline/fix-commas/', json={'s': test_input})
43
-
44
- assert response.status_code == 200
45
- assert response.json().get('s') == expected
46
-
47
-
48
- def test_with_a_very_long_string(client):
49
- s = "Just a long string. " * 1000
50
- response = client.post('/baseline/fix-commas/', json={'s': s})
51
-
52
- assert response.status_code == 200
53
- assert response.json().get('s') == s
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from app import app
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  @pytest.mark.parametrize(
8
+ "endpoint",
9
+ ['/fix-commas/',
10
+ '/baseline/fix-commas/']
 
11
  )
12
+ class TestFixCommaApi:
13
+ @pytest.fixture()
14
+ def client(self):
15
+ yield TestClient(app)
16
+
17
+ def test_fix_commas_fails_on_no_parameter(self, client, endpoint):
18
+ response = client.post(endpoint)
19
+ assert response.status_code == 422
20
+
21
+ def test_fix_commas_fails_on_wrong_parameters(self, client, endpoint):
22
+ response = client.post(endpoint, json={'text': "Some text."})
23
+ assert response.status_code == 400
24
+
25
+ @pytest.mark.parametrize(
26
+ "test_input",
27
+ ['',
28
+ 'Hello world.',
29
+ 'This test string should not have any commas inside it.']
30
+ )
31
+ def test_fix_commas_correct_string_unchanged(self, client, endpoint, test_input: str):
32
+ response = client.post(endpoint, json={'s': test_input})
33
+
34
+ assert response.status_code == 200
35
+ assert response.json().get('s') == test_input
36
+
37
+ @pytest.mark.parametrize(
38
+ "test_input, expected",
39
+ [['I am, here.', 'I am here.'],
40
+ ['books pens and pencils',
41
+ 'books, pens and pencils']]
42
+ )
43
+ def test_fix_commas_fixes_wrong_commas(self, client, endpoint, test_input: str, expected: str):
44
+ response = client.post(endpoint, json={'s': test_input})
45
+
46
+ assert response.status_code == 200
47
+ assert response.json().get('s') == expected
48
+
49
+ def test_with_a_very_long_string(self, endpoint, client):
50
+ s = ("Just a long string. " * 200).rstrip()
51
+ response = client.post(endpoint, json={'s': s})
52
+
53
+ assert response.status_code == 200
54
+ assert response.json().get('s') == s