Elron commited on
Commit
719f1b0
1 Parent(s): 18de90a

Upload text_utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. text_utils.py +20 -25
text_utils.py CHANGED
@@ -1,9 +1,10 @@
 
1
  import re
 
2
 
3
 
4
  def split_words(s):
5
- """
6
- Splits a string into words based on PascalCase, camelCase, snake_case, kebab-case, and numbers attached to strings.
7
 
8
  Args:
9
  s (str): The string to be split.
@@ -19,13 +20,11 @@ def split_words(s):
19
  s = re.sub(r"([a-zA-Z])(\d)", r"\1 \2", s)
20
  s = re.sub(r"(\d)([a-zA-Z])", r"\1 \2", s)
21
  # Split the string into words based on spaces
22
- words = s.split()
23
- return words
24
 
25
 
26
  def is_camel_case(s):
27
- """
28
- Checks if a string is in camelCase.
29
 
30
  Args:
31
  s (str): The string to be checked.
@@ -37,8 +36,7 @@ def is_camel_case(s):
37
 
38
 
39
  def is_snake_case(s):
40
- """
41
- Checks if a string is in snake_case.
42
 
43
  Args:
44
  s (str): The string to be checked.
@@ -50,8 +48,7 @@ def is_snake_case(s):
50
 
51
 
52
  def camel_to_snake_case(s):
53
- """
54
- Converts a string from camelCase to snake_case.
55
 
56
  Args:
57
  s (str): The string to be converted.
@@ -65,16 +62,11 @@ def camel_to_snake_case(s):
65
  # Ensure there's an underscore before any uppercase letter that's followed by a lowercase letter or digit and comes after a sequence of uppercase letters
66
  s = re.sub(r"([A-Z]+)([A-Z][a-z0-9])", r"\1_\2", s)
67
 
68
- s = s.lower()
69
- return s
70
-
71
-
72
- import shutil
73
 
74
 
75
  def print_dict(d, indent=0, indent_delta=4, max_chars=None):
76
- """
77
- Prints a dictionary in a formatted manner, taking into account the terminal width.
78
 
79
  Args:
80
  d (dict): The dictionary to be printed.
@@ -82,13 +74,15 @@ def print_dict(d, indent=0, indent_delta=4, max_chars=None):
82
  indent_delta (int, optional): The amount of spaces to add for each level of indentation. Defaults to 4.
83
  max_chars (int, optional): The maximum number of characters for each line. Defaults to terminal width - 10.
84
  """
85
- max_chars = max_chars or shutil.get_terminal_size()[0] - 10 # Get terminal size if max_chars not set
 
 
86
  indent_str = " " * indent
87
  indent_delta_str = " " * indent_delta
88
 
89
  for key, value in d.items():
90
  if isinstance(value, dict):
91
- print(f"{indent_str}{key}:")
92
  print_dict(value, indent=indent + indent_delta, max_chars=max_chars)
93
  else:
94
  # Value is not a dict, print as a string
@@ -97,21 +91,22 @@ def print_dict(d, indent=0, indent_delta=4, max_chars=None):
97
  line_width = max_chars - indent
98
  # Split value by newline characters and handle each line separately
99
  lines = str_value.split("\n")
100
- print(f"{indent_str}{key}:")
101
  for line in lines:
102
  if len(line) + len(indent_str) + indent_delta > line_width:
103
  # Split long lines into multiple lines
104
- print(f"{indent_str}{indent_delta_str}{line[:line_width]}")
105
  for i in range(line_width, len(line), line_width):
106
- print(f"{indent_str}{indent_delta_str}{line[i:i+line_width]}")
 
 
107
  else:
108
- print(f"{indent_str}{indent_delta_str}{line}")
109
  key = "" # Empty the key for lines after the first one
110
 
111
 
112
  def nested_tuple_to_string(nested_tuple: tuple) -> str:
113
- """
114
- Converts a nested tuple to a string, with elements separated by underscores.
115
 
116
  Args:
117
  nested_tuple (tuple): The nested tuple to be converted.
 
1
+ import logging
2
  import re
3
+ import shutil
4
 
5
 
6
  def split_words(s):
7
+ """Splits a string into words based on PascalCase, camelCase, snake_case, kebab-case, and numbers attached to strings.
 
8
 
9
  Args:
10
  s (str): The string to be split.
 
20
  s = re.sub(r"([a-zA-Z])(\d)", r"\1 \2", s)
21
  s = re.sub(r"(\d)([a-zA-Z])", r"\1 \2", s)
22
  # Split the string into words based on spaces
23
+ return s.split()
 
24
 
25
 
26
  def is_camel_case(s):
27
+ """Checks if a string is in camelCase.
 
28
 
29
  Args:
30
  s (str): The string to be checked.
 
36
 
37
 
38
  def is_snake_case(s):
39
+ """Checks if a string is in snake_case.
 
40
 
41
  Args:
42
  s (str): The string to be checked.
 
48
 
49
 
50
  def camel_to_snake_case(s):
51
+ """Converts a string from camelCase to snake_case.
 
52
 
53
  Args:
54
  s (str): The string to be converted.
 
62
  # Ensure there's an underscore before any uppercase letter that's followed by a lowercase letter or digit and comes after a sequence of uppercase letters
63
  s = re.sub(r"([A-Z]+)([A-Z][a-z0-9])", r"\1_\2", s)
64
 
65
+ return s.lower()
 
 
 
 
66
 
67
 
68
  def print_dict(d, indent=0, indent_delta=4, max_chars=None):
69
+ """Prints a dictionary in a formatted manner, taking into account the terminal width.
 
70
 
71
  Args:
72
  d (dict): The dictionary to be printed.
 
74
  indent_delta (int, optional): The amount of spaces to add for each level of indentation. Defaults to 4.
75
  max_chars (int, optional): The maximum number of characters for each line. Defaults to terminal width - 10.
76
  """
77
+ max_chars = (
78
+ max_chars or shutil.get_terminal_size()[0] - 10
79
+ ) # Get terminal size if max_chars not set
80
  indent_str = " " * indent
81
  indent_delta_str = " " * indent_delta
82
 
83
  for key, value in d.items():
84
  if isinstance(value, dict):
85
+ logging.info(f"{indent_str}{key}:")
86
  print_dict(value, indent=indent + indent_delta, max_chars=max_chars)
87
  else:
88
  # Value is not a dict, print as a string
 
91
  line_width = max_chars - indent
92
  # Split value by newline characters and handle each line separately
93
  lines = str_value.split("\n")
94
+ logging.info(f"{indent_str}{key} ({type(value).__name__}):")
95
  for line in lines:
96
  if len(line) + len(indent_str) + indent_delta > line_width:
97
  # Split long lines into multiple lines
98
+ logging.info(f"{indent_str}{indent_delta_str}{line[:line_width]}")
99
  for i in range(line_width, len(line), line_width):
100
+ logging.info(
101
+ f"{indent_str}{indent_delta_str}{line[i:i+line_width]}"
102
+ )
103
  else:
104
+ logging.info(f"{indent_str}{indent_delta_str}{line}")
105
  key = "" # Empty the key for lines after the first one
106
 
107
 
108
  def nested_tuple_to_string(nested_tuple: tuple) -> str:
109
+ """Converts a nested tuple to a string, with elements separated by underscores.
 
110
 
111
  Args:
112
  nested_tuple (tuple): The nested tuple to be converted.