using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
public static class ClipboardFusionHelper
{
public static string ProcessText(string text)
{
// takes a string of text (typically multiline text) and converts it to a HTML list
// it tries to be smart enough to identify either a ordered list, or unordered list, but
// it only detects ordered lists if they start at the beginning (i.e., a, i, A, I, 0, or 1)
// it also tries to account for cut'n'pasting lists out of MS Word
// I've been having trouble with Regex $ - it seems to think the $ is between the \r and the \n, which isn't very helpful
// this code isn't perfect, but it works well for me
// use at your own discretion
char chrLineIndent = ' '; // default to spaces
int intLineIndentSize = 2;
string strListType = "ul"; // default to an unordered list
string strListCSS = "";
string strOutput = text.Trim();
// remove multiple white spaces characters and replace them with a single space
strOutput = Regex.Replace(strOutput, @"[ \t]+", " ");
// remove empty lines ($ doesn't seem to work very well)
strOutput = Regex.Replace(strOutput, @"(?<linebreak>[\r\n]{1,2})+", "${linebreak}");
// trim the start and end of each line (Regex $ doesn't seem to work very well)
strOutput = Regex.Replace(strOutput, @"(?:^[ \t]*)|(?:[ \t]*(?<linebreak>[\r\n]{1,2}))", "${linebreak}", RegexOptions.Multiline);
if (Regex.IsMatch(strOutput , @"^[aiAI01][^\w0-9]+"))
{
// it looks like we are dealing with some sort of ordered list
strListType = "ol";
// lets work out what type of ordered list it is
switch (strOutput[0].ToString())
{
case "a": strListCSS = "lower-alpha"; break;
case "i": strListCSS = "lower-roman"; break;
case "A": strListCSS = "upper-alpha"; break;
case "I": strListCSS = "upper-roman"; break;
case "0": strListCSS = "decimal-leading-zero"; break;
}
strListCSS = strListCSS.Length > 0 ? "list-style-type:" + strListCSS + ";" : "";
// remove the list item indicators
strOutput = Regex.Replace(strOutput, @"^[a-zA-Z0-9]+[^\w0-9]+", "", RegexOptions.Multiline);
}
else if (Regex.IsMatch(strOutput, @"^[^a-np-zA-NP-Z0-9]+[^\w0-9]+"))
{
// it looks like we have some sort of bulleted unordered list
// typical bullets include •, o, §, #, *, -, ->, =, Ø, etc.
strListType = "ul";
// lets work out what type of unordered list it is
switch (strOutput[0].ToString().ToLower())
{
case "o": strListCSS = "circle"; break;
case "[": strListCSS = "square"; break;
case "■": strListCSS = "square"; break;
case "▪": strListCSS = "square"; break;
}
strListCSS = strListCSS.Length > 0 ? "list-style-type:" + strListCSS + ";" : "";
// remove the list item indicators
strOutput = Regex.Replace(strOutput, @"^[^a-np-zA-NP-Z0-9]+[^\w0-9]+", "", RegexOptions.Multiline);
}
string strLineIndent = new string(chrLineIndent, intLineIndentSize);
// replace line breaks with <li>s (Regex $ doesn't seem to work very well)
strOutput = Regex.Replace(strOutput, @"(?<linebreak>[\r\n]{1,2})", "</li>${linebreak}" + strLineIndent + "<li>");
strOutput = String.Format(
"<{0}{1}>\r\n{2}<li>{3}</li>\r\n</{0}>",
strListType,
(strListCSS.Length > 0 ? " style=\"" + strListCSS + "\"" : ""),
strLineIndent,
strOutput
);
return strOutput;
}
}