483 lines
16 KiB
Python
483 lines
16 KiB
Python
import logging
|
|
import re
|
|
from abc import ABC, abstractmethod
|
|
from typing import Callable, Optional, Union
|
|
|
|
from talon import Context, Module, actions, app, registry
|
|
from talon.grammar import Phrase
|
|
|
|
|
|
class Formatter(ABC):
|
|
def __init__(self, id: str):
|
|
self.id = id
|
|
|
|
@abstractmethod
|
|
def format(self, text: str) -> str:
|
|
pass
|
|
|
|
@abstractmethod
|
|
def unformat(self, text: str) -> str:
|
|
pass
|
|
|
|
|
|
class CustomFormatter(Formatter):
|
|
def __init__(
|
|
self,
|
|
id: str,
|
|
format: Callable[[str], str],
|
|
unformat: Optional[Callable[[str], str]] = None,
|
|
):
|
|
super().__init__(id)
|
|
self._format = format
|
|
self._unformat = unformat
|
|
|
|
def format(self, text: str) -> str:
|
|
return self._format(text)
|
|
|
|
def unformat(self, text: str) -> str:
|
|
if self._unformat:
|
|
return self._unformat(text)
|
|
return text
|
|
|
|
|
|
class CodeFormatter(Formatter):
|
|
def __init__(
|
|
self,
|
|
id: str,
|
|
delimiter: str,
|
|
format_first: Callable[[str], str],
|
|
format_rest: Callable[[str], str],
|
|
):
|
|
super().__init__(id)
|
|
self._delimiter = delimiter
|
|
self._format_first = format_first
|
|
self._format_rest = format_rest
|
|
|
|
def format(self, text: str) -> str:
|
|
return self._format_delim(
|
|
text, self._delimiter, self._format_first, self._format_rest
|
|
)
|
|
|
|
def unformat(self, text: str) -> str:
|
|
return remove_code_formatting(text)
|
|
|
|
def _format_delim(
|
|
self,
|
|
text: str,
|
|
delimiter: str,
|
|
format_first: Callable[[str], str],
|
|
format_rest: Callable[[str], str],
|
|
):
|
|
# Strip anything that is not alpha-num, whitespace, dot or comma
|
|
text = re.sub(r"[^\w\d\s.,]+", "", text)
|
|
# Split on anything that is not alpha-num
|
|
words = re.split(r"([^\w\d]+)", text)
|
|
groups = []
|
|
group = []
|
|
first = True
|
|
|
|
for word in words:
|
|
if word.isspace():
|
|
continue
|
|
# Word is number
|
|
if word.isnumeric():
|
|
first = True
|
|
# Word is symbol
|
|
elif not word.isalpha():
|
|
groups.append(delimiter.join(group))
|
|
word = word.strip()
|
|
if word != ".":
|
|
word += " "
|
|
first = True
|
|
groups.append(word)
|
|
group = []
|
|
continue
|
|
elif first:
|
|
first = False
|
|
if format_first:
|
|
word = format_first(word)
|
|
elif format_rest:
|
|
word = format_rest(word)
|
|
group.append(word)
|
|
|
|
groups.append(delimiter.join(group))
|
|
return "".join(groups)
|
|
|
|
|
|
class TitleFormatter(Formatter):
|
|
_words_to_keep_lowercase = (
|
|
"a an and as at but by en for if in nor of on or per the to v via vs".split()
|
|
)
|
|
|
|
def format(self, text: str) -> str:
|
|
words = [x for x in re.split(r"(\s+)", text) if x]
|
|
words = self._title_case_words(words)
|
|
return "".join(words)
|
|
|
|
def unformat(self, text: str) -> str:
|
|
return unformat_upper(text)
|
|
|
|
def _title_case_word(
|
|
self, word: str, is_first: bool, is_last: bool, following_symbol: bool
|
|
) -> str:
|
|
if not word.islower() or (
|
|
word in self._words_to_keep_lowercase
|
|
and not is_first
|
|
and not is_last
|
|
and not following_symbol
|
|
):
|
|
return word
|
|
|
|
if "-" in word:
|
|
words = word.split("-")
|
|
words = self._title_case_words(words)
|
|
return "-".join(words)
|
|
|
|
return word.capitalize()
|
|
|
|
def _title_case_words(self, words: list[str]) -> list[str]:
|
|
following_symbol = False
|
|
for i, word in enumerate(words):
|
|
if word.isspace():
|
|
continue
|
|
is_first = i == 0
|
|
is_last = i == len(words) - 1
|
|
words[i] = self._title_case_word(word, is_first, is_last, following_symbol)
|
|
following_symbol = not word[-1].isalnum()
|
|
return words
|
|
|
|
|
|
class CapitalizeFormatter(Formatter):
|
|
def format(self, text: str) -> str:
|
|
return re.sub(r"^\s*\S+", lambda m: capitalize_first(m.group()), text)
|
|
|
|
def unformat(self, text: str) -> str:
|
|
return unformat_upper(text)
|
|
|
|
|
|
class SentenceFormatter(Formatter):
|
|
def format(self, text: str) -> str:
|
|
"""Capitalize first word if it's already all lower case"""
|
|
words = [x for x in re.split(r"(\s+)", text) if x]
|
|
for i in range(len(words)):
|
|
word = words[i]
|
|
if word.isspace():
|
|
continue
|
|
if word.islower():
|
|
words[i] = word.capitalize()
|
|
break
|
|
return "".join(words)
|
|
|
|
def unformat(self, text: str) -> str:
|
|
return unformat_upper(text)
|
|
|
|
|
|
def capitalize_first(text: str) -> str:
|
|
stripped = text.lstrip()
|
|
prefix = text[: len(text) - len(stripped)]
|
|
return prefix + stripped[:1].upper() + stripped[1:]
|
|
|
|
|
|
def capitalize(text: str) -> str:
|
|
return text.capitalize()
|
|
|
|
|
|
def lower(text: str) -> str:
|
|
return text.lower()
|
|
|
|
|
|
def unformat_upper(text: str) -> str:
|
|
return text.lower() if text.isupper() else text
|
|
|
|
|
|
def remove_code_formatting(text: str) -> str:
|
|
"""Remove format from text"""
|
|
# Split on delimiters.
|
|
result = re.sub(r"[-_.:/]+", " ", text)
|
|
# Split camel case. Including numbers
|
|
result = de_camel(result)
|
|
# Delimiter/camel case successfully split. Lower case to restore "original" text.
|
|
if text != result:
|
|
return result.lower()
|
|
return text
|
|
|
|
|
|
def de_camel(text: str) -> str:
|
|
"""Replacing camelCase boundaries with blank space"""
|
|
Ll = "a-zåäö"
|
|
Lu = "A-ZÅÄÖ"
|
|
L = f"{Ll}{Lu}"
|
|
low_to_upper = rf"(?<=[{Ll}])(?=[{Lu}])" # camel|Case
|
|
upper_to_last_upper = rf"(?<=[L{Lu}])(?=[{Lu}][{Ll}])" # IP|Address
|
|
letter_to_digit = rf"(?<=[{L}])(?=[\d])" # version|10
|
|
digit_to_letter = rf"(?<=[\d])(?=[{L}])" # 2|x
|
|
return re.sub(
|
|
rf"{low_to_upper}|{upper_to_last_upper}|{letter_to_digit}|{digit_to_letter}",
|
|
" ",
|
|
text,
|
|
)
|
|
|
|
|
|
formatter_list = [
|
|
CustomFormatter("NOOP", lambda text: text),
|
|
CustomFormatter("TRAILING_SPACE", lambda text: f"{text} "),
|
|
CustomFormatter("DOUBLE_QUOTED_STRING", lambda text: f'"{text}"'),
|
|
CustomFormatter("SINGLE_QUOTED_STRING", lambda text: f"'{text}'"),
|
|
CustomFormatter("SPACE_SURROUNDED_STRING", lambda text: f" {text} "),
|
|
CustomFormatter("ALL_CAPS", lambda text: text.upper()),
|
|
CustomFormatter("ALL_LOWERCASE", lambda text: text.lower()),
|
|
CustomFormatter("COMMA_SEPARATED", lambda text: re.sub(r"\s+", ", ", text)),
|
|
CustomFormatter("REMOVE_FORMATTING", remove_code_formatting),
|
|
TitleFormatter("CAPITALIZE_ALL_WORDS"),
|
|
# The sentence formatter being called `CAPITALIZE_FIRST_WORD` is a bit of a misnomer, but kept for backward compatibility.
|
|
SentenceFormatter("CAPITALIZE_FIRST_WORD"),
|
|
# This is the formatter that actually just capitalizes the first word
|
|
CapitalizeFormatter("CAPITALIZE"),
|
|
CodeFormatter("NO_SPACES", "", lower, lower),
|
|
CodeFormatter("PRIVATE_CAMEL_CASE", "", lower, capitalize),
|
|
CodeFormatter("PUBLIC_CAMEL_CASE", "", capitalize, capitalize),
|
|
CodeFormatter("SNAKE_CASE", "_", lower, lower),
|
|
CodeFormatter("DASH_SEPARATED", "-", lower, lower),
|
|
CodeFormatter("DOT_SEPARATED", ".", lower, lower),
|
|
CodeFormatter("SLASH_SEPARATED", "/", lower, lower),
|
|
CodeFormatter("ALL_SLASHES", "/", lambda text: f"/{text.lower()}", lower),
|
|
CodeFormatter("DOUBLE_UNDERSCORE", "__", lower, lower),
|
|
CodeFormatter("DOUBLE_COLON_SEPARATED", "::", lower, lower),
|
|
]
|
|
|
|
formatters_dict = {f.id: f for f in formatter_list}
|
|
|
|
mod = Module()
|
|
mod.list("reformatter", desc="list of all reformatters")
|
|
mod.list("code_formatter", desc="list of formatters typically applied to code")
|
|
mod.list(
|
|
"prose_formatter", desc="list of prose formatters (words to start dictating prose)"
|
|
)
|
|
mod.list("word_formatter", "List of word formatters")
|
|
|
|
# The last phrase spoken, without & with formatting. Used for reformatting.
|
|
last_phrase = ""
|
|
last_phrase_formatted = ""
|
|
|
|
|
|
def format_phrase(
|
|
m: Union[str, Phrase], formatters: str, unformat: bool = False
|
|
) -> str:
|
|
global last_phrase, last_phrase_formatted
|
|
last_phrase = m
|
|
|
|
if isinstance(m, str):
|
|
text = m
|
|
else:
|
|
text = " ".join(actions.dictate.replace_words(actions.dictate.parse_words(m)))
|
|
|
|
result = last_phrase_formatted = format_text_without_adding_to_history(
|
|
text, formatters, unformat
|
|
)
|
|
|
|
actions.user.add_phrase_to_history(result)
|
|
# Arguably, we shouldn't be dealing with history here, but somewhere later
|
|
# down the line. But we have a bunch of code that relies on doing it this
|
|
# way and I don't feel like rewriting it just now. -rntz, 2020-11-04
|
|
return result
|
|
|
|
|
|
def format_text_without_adding_to_history(
|
|
text: str, formatters: str, unformat: bool = False
|
|
) -> str:
|
|
"""Formats a text according to formatters. formatters is a comma-separated string of formatters (e.g. 'TITLE_CASE,SNAKE_CASE')"""
|
|
if not text:
|
|
return text
|
|
|
|
text, pre, post = shrink_to_string_inside(text)
|
|
|
|
for i, formatter_name in enumerate(reversed(formatters.split(","))):
|
|
formatter = formatters_dict[formatter_name]
|
|
if unformat and i == 0:
|
|
text = formatter.unformat(text)
|
|
text = formatter.format(text)
|
|
|
|
return f"{pre}{text}{post}"
|
|
|
|
|
|
string_delimiters = [
|
|
['"""', '"""'],
|
|
['"', '"'],
|
|
["'", "'"],
|
|
]
|
|
|
|
|
|
def shrink_to_string_inside(text: str) -> tuple[str, str, str]:
|
|
for [left, right] in string_delimiters:
|
|
if text.startswith(left) and text.endswith(right):
|
|
return text[len(left) : -len(right)], left, right
|
|
return text, "", ""
|
|
|
|
|
|
@mod.capture(
|
|
rule="({user.code_formatter} | {user.prose_formatter} | {user.reformatter})+"
|
|
)
|
|
def formatters(m) -> str:
|
|
"Returns a comma-separated string of formatters e.g. 'SNAKE,DUBSTRING'"
|
|
return ",".join(list(m))
|
|
|
|
|
|
@mod.capture(rule="{self.code_formatter}+")
|
|
def code_formatters(m) -> str:
|
|
"Returns a comma-separated string of code formatters e.g. 'SNAKE,DUBSTRING'"
|
|
return ",".join(m.code_formatter_list)
|
|
|
|
|
|
@mod.capture(
|
|
rule="<self.formatters> <user.text> (<user.text> | <user.formatter_immune>)*"
|
|
)
|
|
def format_text(m) -> str:
|
|
"""Formats text and returns a string"""
|
|
out = ""
|
|
formatters = m[0]
|
|
for chunk in m[1:]:
|
|
if isinstance(chunk, ImmuneString):
|
|
out += chunk.string
|
|
else:
|
|
out += format_phrase(chunk, formatters)
|
|
return out
|
|
|
|
|
|
@mod.capture(rule="<user.code_formatters> <user.text>")
|
|
def format_code(m) -> str:
|
|
"""Formats code and returns a string"""
|
|
return format_phrase(m.text, m.code_formatters)
|
|
|
|
|
|
class ImmuneString:
|
|
"""Wrapper that makes a string immune from formatting."""
|
|
|
|
def __init__(self, string):
|
|
self.string = string
|
|
|
|
|
|
@mod.capture(
|
|
# Add anything else into this that you want to have inserted when
|
|
# using a prose formatter.
|
|
rule="(<user.symbol_key> | (numb | numeral) <number>)"
|
|
)
|
|
def formatter_immune(m) -> ImmuneString:
|
|
"""Symbols and numbers that can be interspersed into a prose formatter
|
|
(i.e., not dictated immediately after the name of the formatter)
|
|
|
|
They will be inserted directly, without being formatted.
|
|
|
|
"""
|
|
if hasattr(m, "number"):
|
|
value = m.number
|
|
else:
|
|
value = m[0]
|
|
return ImmuneString(str(value))
|
|
|
|
|
|
def get_formatters_and_prose_formatters(
|
|
include_reformatters: bool,
|
|
) -> tuple[dict[str, str], dict[str, str]]:
|
|
"""Returns dictionary of non-word formatters and a dictionary of all prose formatters"""
|
|
formatters = {}
|
|
prose_formatters = {}
|
|
formatters.update(
|
|
actions.user.talon_get_active_registry_list("user.code_formatter")
|
|
)
|
|
formatters.update(
|
|
actions.user.talon_get_active_registry_list("user.prose_formatter")
|
|
)
|
|
|
|
if include_reformatters:
|
|
formatters.update(
|
|
actions.user.talon_get_active_registry_list("user.reformatter")
|
|
)
|
|
|
|
prose_formatters.update(
|
|
actions.user.talon_get_active_registry_list("user.prose_formatter")
|
|
)
|
|
return formatters, prose_formatters
|
|
|
|
|
|
@mod.action_class
|
|
class Actions:
|
|
def formatted_text(phrase: Union[str, Phrase], formatters: str) -> str:
|
|
"""Formats a phrase according to formatters. formatters is a comma-separated string of formatters (e.g. 'CAPITALIZE_ALL_WORDS,DOUBLE_QUOTED_STRING')"""
|
|
return format_phrase(phrase, formatters)
|
|
|
|
def insert_formatted(phrase: Union[str, Phrase], formatters: str):
|
|
"""Inserts a phrase formatted according to formatters. Formatters is a comma separated list of formatters (e.g. 'CAPITALIZE_ALL_WORDS,DOUBLE_QUOTED_STRING')"""
|
|
actions.insert(format_phrase(phrase, formatters))
|
|
|
|
def insert_with_history(text: str):
|
|
"""Inserts some text, remembering it in the phrase history."""
|
|
actions.user.deprecate_action("2022-12-11", "user.insert_with_history")
|
|
|
|
actions.user.add_phrase_to_history(text)
|
|
actions.insert(text)
|
|
|
|
def formatters_reformat_last(formatters: str):
|
|
"""Clears and reformats last formatted phrase"""
|
|
global last_phrase, last_phrase_formatted
|
|
if actions.user.get_last_phrase() != last_phrase_formatted:
|
|
# The last thing we inserted isn't the same as the last thing we
|
|
# formatted, so abort.
|
|
logging.warning(
|
|
"formatters_reformat_last(): Last phrase wasn't a formatter!"
|
|
)
|
|
return
|
|
actions.user.clear_last_phrase()
|
|
actions.user.insert_formatted(last_phrase, formatters)
|
|
|
|
def reformat_text(text: str, formatters: str) -> str:
|
|
"""Re-formats <text> as <formatters>"""
|
|
return format_phrase(text, formatters, True)
|
|
|
|
def formatters_reformat_selection(formatters: str):
|
|
"""Reformats the current selection as <formatters>"""
|
|
selected = actions.edit.selected_text()
|
|
if not selected:
|
|
app.notify("Asked to reformat selection, but nothing selected!")
|
|
return
|
|
# Delete separately for compatibility with programs that don't overwrite
|
|
# selected text (e.g. Emacs)
|
|
actions.edit.delete()
|
|
text = actions.user.reformat_text(selected, formatters)
|
|
actions.insert(text)
|
|
|
|
def get_formatters_words() -> dict:
|
|
"""Returns words currently used as formatters, and a demonstration string using those formatters"""
|
|
formatters_help_demo = {}
|
|
formatters, prose_formatters = get_formatters_and_prose_formatters(
|
|
include_reformatters=False
|
|
)
|
|
prose_formatter_names = prose_formatters.keys()
|
|
|
|
for phrase in sorted(formatters):
|
|
name = formatters[phrase]
|
|
demo = format_text_without_adding_to_history("one two three", name)
|
|
if phrase in prose_formatter_names:
|
|
phrase += " *"
|
|
formatters_help_demo[phrase] = demo
|
|
return formatters_help_demo
|
|
|
|
def get_reformatters_words() -> dict:
|
|
"""Returns words currently used as re-formatters, and a demonstration string using those re-formatters"""
|
|
formatters_help_demo = {}
|
|
formatters, prose_formatters = get_formatters_and_prose_formatters(
|
|
include_reformatters=True
|
|
)
|
|
prose_formatter_names = prose_formatters.keys()
|
|
for phrase in sorted(formatters):
|
|
name = formatters[phrase]
|
|
demo = format_text_without_adding_to_history("one_two_three", name, True)
|
|
if phrase in prose_formatter_names:
|
|
phrase += " *"
|
|
formatters_help_demo[phrase] = demo
|
|
return formatters_help_demo
|
|
|
|
def insert_many(strings: list[str]) -> None:
|
|
"""Insert a list of strings, sequentially."""
|
|
for string in strings:
|
|
actions.insert(string)
|