Using ‘special’ unicode characters is a known way to hack systems. In general all characters that might not be displayed by IDE’s should be flagged. On top of that I think that all alternative characters that might look like operators should be prohibited from being a method, variable, class name. (Only allowed within string literals).
Nice reads
internal static bool IsVulnerability(char ch)
=> IsSuspiciousCategory(char.GetUnicodeCategory(ch))
&& !InAllowedRange(ch);
private static bool IsSuspiciousCategory(UnicodeCategory cat)
=> cat == UnicodeCategory.Control
|| cat == UnicodeCategory.Format
|| cat == UnicodeCategory.Surrogate
|| cat == UnicodeCategory.OtherNotAssigned;
private static bool InAllowedRange(int unicode)
=> Arabic.Contains(unicode)
|| ArabicSupplement.Contains(unicode)
|| ArabicExtendedB.Contains(unicode)
|| ArabicExtendedA.Contains(unicode)
|| ArabicPresentationFormsA.Contains(unicode)
|| ArabicPresentationFormsB.Contains(unicode)
|| RumiNumeralSymbols.Contains(unicode)
|| IndicSiyaqNumbers.Contains(unicode)
|| OttomanSiyaqNumbers.Contains(unicode)
|| ArabicMathematicalAlphabeticSymbols.Contains(unicode)
|| CJKUnifiedIdeographs.Contains(unicode)
|| CJKUnifiedIdeographsExtensionA.Contains(unicode)
|| CJKUnifiedIdeographsExtensionB.Contains(unicode)
|| CJKUnifiedIdeographsExtensionC.Contains(unicode)
|| CJKUnifiedIdeographsExtensionD.Contains(unicode)
|| CJKUnifiedIdeographsExtensionE.Contains(unicode)
|| CJKUnifiedIdeographsExtensionF.Contains(unicode)
|| CJKBlockSymbolsAndPunctuation.Contains(unicode);
private static readonly Range Arabic = new Range(0x00600, 0x006FF);
private static readonly Range ArabicSupplement = new Range(0x00750, 0x0077F);
private static readonly Range ArabicExtendedB = new Range(0x00870, 0x0089F);
private static readonly Range ArabicExtendedA = new Range(0x008A0, 0x008FF);
private static readonly Range ArabicPresentationFormsA = new Range(0x0FB50, 0x0FDFF);
private static readonly Range ArabicPresentationFormsB = new Range(0x0FE70, 0x0FEFF);
private static readonly Range RumiNumeralSymbols = new Range(0x10E60, 0x10E7F);
private static readonly Range IndicSiyaqNumbers = new Range(0x1EC70, 0x1ECBF);
private static readonly Range OttomanSiyaqNumbers = new Range(0x1ED00, 0x1ED4F);
private static readonly Range ArabicMathematicalAlphabeticSymbols = new Range(0x1EE00, 0x1EEFF);
private static readonly Range CJKUnifiedIdeographs = new Range(0x04E00, 0X09FEF);
private static readonly Range CJKUnifiedIdeographsExtensionA = new Range(0x03400, 0X04DBF);
private static readonly Range CJKUnifiedIdeographsExtensionB = new Range(0x20000, 0X2A6DF);
private static readonly Range CJKUnifiedIdeographsExtensionC = new Range(0x2A700, 0X2B73F);
private static readonly Range CJKUnifiedIdeographsExtensionD = new Range(0x2B740, 0X2B81F);
private static readonly Range CJKUnifiedIdeographsExtensionE = new Range(0x2B820, 0X2CEAF);
private static readonly Range CJKUnifiedIdeographsExtensionF = new Range(0x2CEB0, 0X2EBEF);
private static readonly Range CJKBlockSymbolsAndPunctuation = new Range(0x03007, 0X03007);
Obviously, a special case has to be made for \r that is allowed combined with \n (end of line), but not for the rest. And variables might be best restricted to [A-Z0-9_]+.