Skip to content
Snippets Groups Projects
Commit b5266b3a authored by Yann Audin's avatar Yann Audin
Browse files

2024-06-26

parent 0437fc01
No related branches found
No related tags found
No related merge requests found
%% Cell type:code id:06d1d892-453c-475a-9b26-6e552a80427e tags:
``` python
import pandas as pd
path = r"C:\Users\audin\gitlab\anthalgo\PRODUCTION\2024_JADT\2024-06-25\corpus_fra.csv"
df = pd.read_csv(path)
characters = 0
words = 0
fragments = 0
for index, row in df.iterrows():
if row["book"] == 6:
fragments = fragments + 1
words = words + len(row["text"].split())
characters = characters + len(row["text"])
print("Fragment : " + str(fragments))
print("Words : " + str(words))
print("Characters : " + str(characters))
print("Average words per fragment : " + str(words/fragments))
print("Average character per word (including punctuation) : " + str(characters/words))
```
%% Output
Fragment : 358
Words : 20973
Characters : 122282
Average words per fragment : 58.58379888268156
Average character per word (including punctuation) : 5.830448672102227
```python
import pandas as pd
path = r"C:\Users\audin\gitlab\anthalgo\PRODUCTION\2024_JADT\2024-06-25\corpus_fra.csv"
df = pd.read_csv(path)
characters = 0
words = 0
fragments = 0
for index, row in df.iterrows():
if row["book"] == 6:
fragments = fragments + 1
words = words + len(row["text"].split())
characters = characters + len(row["text"])
print("Fragment : " + str(fragments))
print("Words : " + str(words))
print("Characters : " + str(characters))
print("Average words per fragment : " + str(words/fragments))
print("Average character per word (including punctuation) : " + str(characters/words))
```
Fragment : 358
Words : 20973
Characters : 122282
Average words per fragment : 58.58379888268156
Average character per word (including punctuation) : 5.830448672102227
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment