Hatena::Groupsubtech

ういはるかぜの化学

Wednesday, November 05, 2008

XMLDocument/XDocument などに XHTML をつっこむと 実体参照が残念なことになる 15:37  XMLDocument/XDocument などに XHTML をつっこむと 実体参照が残念なことになる - ういはるかぜの化学 を含むブックマーク はてなブックマーク -  XMLDocument/XDocument などに XHTML をつっこむと 実体参照が残念なことになる - ういはるかぜの化学

HTML定義されている実体参照を数値文字参照に置き換えるやつをいつも書いている気がするので。

    static class Utility
    {
        private static Dictionary<String, String> _characterEntityReferences = new Dictionary<string, string>(StringComparer.InvariantCultureIgnoreCase);
        private static Regex _matcher;
        static Utility()
        {
            _characterEntityReferences["nbsp"] = "&#160;";
            _characterEntityReferences["iexcl"] = "&#161;";
            _characterEntityReferences["cent"] = "&#162;";
            _characterEntityReferences["pound"] = "&#163;";
            _characterEntityReferences["curren"] = "&#164;";
            _characterEntityReferences["yen"] = "&#165;";
            _characterEntityReferences["brvbar"] = "&#166;";
            _characterEntityReferences["sect"] = "&#167;";
            _characterEntityReferences["uml"] = "&#168;";
            _characterEntityReferences["copy"] = "&#169;";
            _characterEntityReferences["ordf"] = "&#170;";
            _characterEntityReferences["laquo"] = "&#171;";
            _characterEntityReferences["not"] = "&#172;";
            _characterEntityReferences["shy"] = "&#173;";
            _characterEntityReferences["reg"] = "&#174;";
            _characterEntityReferences["macr"] = "&#175;";
            _characterEntityReferences["deg"] = "&#176;";
            _characterEntityReferences["plusmn"] = "&#177;";
            _characterEntityReferences["sup2"] = "&#178;";
            _characterEntityReferences["sup3"] = "&#179;";
            _characterEntityReferences["acute"] = "&#180;";
            _characterEntityReferences["micro"] = "&#181;";
            _characterEntityReferences["para"] = "&#182;";
            _characterEntityReferences["middot"] = "&#183;";
            _characterEntityReferences["cedil"] = "&#184;";
            _characterEntityReferences["sup1"] = "&#185;";
            _characterEntityReferences["ordm"] = "&#186;";
            _characterEntityReferences["raquo"] = "&#187;";
            _characterEntityReferences["frac14"] = "&#188;";
            _characterEntityReferences["frac12"] = "&#189;";
            _characterEntityReferences["frac34"] = "&#190;";
            _characterEntityReferences["iquest"] = "&#191;";
            _characterEntityReferences["Agrave"] = "&#192;";
            _characterEntityReferences["Aacute"] = "&#193;";
            _characterEntityReferences["Acirc"] = "&#194;";
            _characterEntityReferences["Atilde"] = "&#195;";
            _characterEntityReferences["Auml"] = "&#196;";
            _characterEntityReferences["Aring"] = "&#197;";
            _characterEntityReferences["AElig"] = "&#198;";
            _characterEntityReferences["Ccedil"] = "&#199;";
            _characterEntityReferences["Egrave"] = "&#200;";
            _characterEntityReferences["Eacute"] = "&#201;";
            _characterEntityReferences["Ecirc"] = "&#202;";
            _characterEntityReferences["Euml"] = "&#203;";
            _characterEntityReferences["Igrave"] = "&#204;";
            _characterEntityReferences["Iacute"] = "&#205;";
            _characterEntityReferences["Icirc"] = "&#206;";
            _characterEntityReferences["Iuml"] = "&#207;";
            _characterEntityReferences["ETH"] = "&#208;";
            _characterEntityReferences["Ntilde"] = "&#209;";
            _characterEntityReferences["Ograve"] = "&#210;";
            _characterEntityReferences["Oacute"] = "&#211;";
            _characterEntityReferences["Ocirc"] = "&#212;";
            _characterEntityReferences["Otilde"] = "&#213;";
            _characterEntityReferences["Ouml"] = "&#214;";
            _characterEntityReferences["times"] = "&#215;";
            _characterEntityReferences["Oslash"] = "&#216;";
            _characterEntityReferences["Ugrave"] = "&#217;";
            _characterEntityReferences["Uacute"] = "&#218;";
            _characterEntityReferences["Ucirc"] = "&#219;";
            _characterEntityReferences["Uuml"] = "&#220;";
            _characterEntityReferences["Yacute"] = "&#221;";
            _characterEntityReferences["THORN"] = "&#222;";
            _characterEntityReferences["szlig"] = "&#223;";
            _characterEntityReferences["agrave"] = "&#224;";
            _characterEntityReferences["aacute"] = "&#225;";
            _characterEntityReferences["acirc"] = "&#226;";
            _characterEntityReferences["atilde"] = "&#227;";
            _characterEntityReferences["auml"] = "&#228;";
            _characterEntityReferences["aring"] = "&#229;";
            _characterEntityReferences["aelig"] = "&#230;";
            _characterEntityReferences["ccedil"] = "&#231;";
            _characterEntityReferences["egrave"] = "&#232;";
            _characterEntityReferences["eacute"] = "&#233;";
            _characterEntityReferences["ecirc"] = "&#234;";
            _characterEntityReferences["euml"] = "&#235;";
            _characterEntityReferences["igrave"] = "&#236;";
            _characterEntityReferences["iacute"] = "&#237;";
            _characterEntityReferences["icirc"] = "&#238;";
            _characterEntityReferences["iuml"] = "&#239;";
            _characterEntityReferences["eth"] = "&#240;";
            _characterEntityReferences["ntilde"] = "&#241;";
            _characterEntityReferences["ograve"] = "&#242;";
            _characterEntityReferences["oacute"] = "&#243;";
            _characterEntityReferences["ocirc"] = "&#244;";
            _characterEntityReferences["otilde"] = "&#245;";
            _characterEntityReferences["ouml"] = "&#246;";
            _characterEntityReferences["divide"] = "&#247;";
            _characterEntityReferences["oslash"] = "&#248;";
            _characterEntityReferences["ugrave"] = "&#249;";
            _characterEntityReferences["uacute"] = "&#250;";
            _characterEntityReferences["ucirc"] = "&#251;";
            _characterEntityReferences["uuml"] = "&#252;";
            _characterEntityReferences["yacute"] = "&#253;";
            _characterEntityReferences["thorn"] = "&#254;";
            _characterEntityReferences["yuml"] = "&#255;";
            _characterEntityReferences["fnof"] = "&#402;";
            _characterEntityReferences["Alpha"] = "&#913;";
            _characterEntityReferences["Beta"] = "&#914;";
            _characterEntityReferences["Gamma"] = "&#915;";
            _characterEntityReferences["Delta"] = "&#916;";
            _characterEntityReferences["Epsilon"] = "&#917;";
            _characterEntityReferences["Zeta"] = "&#918;";
            _characterEntityReferences["Eta"] = "&#919;";
            _characterEntityReferences["Theta"] = "&#920;";
            _characterEntityReferences["Iota"] = "&#921;";
            _characterEntityReferences["Kappa"] = "&#922;";
            _characterEntityReferences["Lambda"] = "&#923;";
            _characterEntityReferences["Mu"] = "&#924;";
            _characterEntityReferences["Nu"] = "&#925;";
            _characterEntityReferences["Xi"] = "&#926;";
            _characterEntityReferences["Omicron"] = "&#927;";
            _characterEntityReferences["Pi"] = "&#928;";
            _characterEntityReferences["Rho"] = "&#929;";
            _characterEntityReferences["Sigma"] = "&#931;";
            _characterEntityReferences["Tau"] = "&#932;";
            _characterEntityReferences["Upsilon"] = "&#933;";
            _characterEntityReferences["Phi"] = "&#934;";
            _characterEntityReferences["Chi"] = "&#935;";
            _characterEntityReferences["Psi"] = "&#936;";
            _characterEntityReferences["Omega"] = "&#937;";
            _characterEntityReferences["alpha"] = "&#945;";
            _characterEntityReferences["beta"] = "&#946;";
            _characterEntityReferences["gamma"] = "&#947;";
            _characterEntityReferences["delta"] = "&#948;";
            _characterEntityReferences["epsilon"] = "&#949;";
            _characterEntityReferences["zeta"] = "&#950;";
            _characterEntityReferences["eta"] = "&#951;";
            _characterEntityReferences["theta"] = "&#952;";
            _characterEntityReferences["iota"] = "&#953;";
            _characterEntityReferences["kappa"] = "&#954;";
            _characterEntityReferences["lambda"] = "&#955;";
            _characterEntityReferences["mu"] = "&#956;";
            _characterEntityReferences["nu"] = "&#957;";
            _characterEntityReferences["xi"] = "&#958;";
            _characterEntityReferences["omicron"] = "&#959;";
            _characterEntityReferences["pi"] = "&#960;";
            _characterEntityReferences["rho"] = "&#961;";
            _characterEntityReferences["sigmaf"] = "&#962;";
            _characterEntityReferences["sigma"] = "&#963;";
            _characterEntityReferences["tau"] = "&#964;";
            _characterEntityReferences["upsilon"] = "&#965;";
            _characterEntityReferences["phi"] = "&#966;";
            _characterEntityReferences["chi"] = "&#967;";
            _characterEntityReferences["psi"] = "&#968;";
            _characterEntityReferences["omega"] = "&#969;";
            _characterEntityReferences["thetasym"] = "&#977;";
            _characterEntityReferences["upsih"] = "&#978;";
            _characterEntityReferences["piv"] = "&#982;";
            _characterEntityReferences["bull"] = "&#8226;";
            _characterEntityReferences["hellip"] = "&#8230;";
            _characterEntityReferences["prime"] = "&#8242;";
            _characterEntityReferences["Prime"] = "&#8243;";
            _characterEntityReferences["oline"] = "&#8254;";
            _characterEntityReferences["frasl"] = "&#8260;";
            _characterEntityReferences["weierp"] = "&#8472;";
            _characterEntityReferences["image"] = "&#8465;";
            _characterEntityReferences["real"] = "&#8476;";
            _characterEntityReferences["trade"] = "&#8482;";
            _characterEntityReferences["alefsym"] = "&#8501;";
            _characterEntityReferences["larr"] = "&#8592;";
            _characterEntityReferences["uarr"] = "&#8593;";
            _characterEntityReferences["rarr"] = "&#8594;";
            _characterEntityReferences["darr"] = "&#8595;";
            _characterEntityReferences["harr"] = "&#8596;";
            _characterEntityReferences["crarr"] = "&#8629;";
            _characterEntityReferences["lArr"] = "&#8656;";
            _characterEntityReferences["uArr"] = "&#8657;";
            _characterEntityReferences["rArr"] = "&#8658;";
            _characterEntityReferences["dArr"] = "&#8659;";
            _characterEntityReferences["hArr"] = "&#8660;";
            _characterEntityReferences["forall"] = "&#8704;";
            _characterEntityReferences["part"] = "&#8706;";
            _characterEntityReferences["exist"] = "&#8707;";
            _characterEntityReferences["empty"] = "&#8709;";
            _characterEntityReferences["nabla"] = "&#8711;";
            _characterEntityReferences["isin"] = "&#8712;";
            _characterEntityReferences["notin"] = "&#8713;";
            _characterEntityReferences["ni"] = "&#8715;";
            _characterEntityReferences["prod"] = "&#8719;";
            _characterEntityReferences["sum"] = "&#8721;";
            _characterEntityReferences["minus"] = "&#8722;";
            _characterEntityReferences["lowast"] = "&#8727;";
            _characterEntityReferences["radic"] = "&#8730;";
            _characterEntityReferences["prop"] = "&#8733;";
            _characterEntityReferences["infin"] = "&#8734;";
            _characterEntityReferences["ang"] = "&#8736;";
            _characterEntityReferences["and"] = "&#8743;";
            _characterEntityReferences["or"] = "&#8744;";
            _characterEntityReferences["cap"] = "&#8745;";
            _characterEntityReferences["cup"] = "&#8746;";
            _characterEntityReferences["int"] = "&#8747;";
            _characterEntityReferences["there4"] = "&#8756;";
            _characterEntityReferences["sim"] = "&#8764;";
            _characterEntityReferences["cong"] = "&#8773;";
            _characterEntityReferences["asymp"] = "&#8776;";
            _characterEntityReferences["ne"] = "&#8800;";
            _characterEntityReferences["equiv"] = "&#8801;";
            _characterEntityReferences["le"] = "&#8804;";
            _characterEntityReferences["ge"] = "&#8805;";
            _characterEntityReferences["sub"] = "&#8834;";
            _characterEntityReferences["sup"] = "&#8835;";
            _characterEntityReferences["nsub"] = "&#8836;";
            _characterEntityReferences["sube"] = "&#8838;";
            _characterEntityReferences["supe"] = "&#8839;";
            _characterEntityReferences["oplus"] = "&#8853;";
            _characterEntityReferences["otimes"] = "&#8855;";
            _characterEntityReferences["perp"] = "&#8869;";
            _characterEntityReferences["sdot"] = "&#8901;";
            _characterEntityReferences["lceil"] = "&#8968;";
            _characterEntityReferences["rceil"] = "&#8969;";
            _characterEntityReferences["lfloor"] = "&#8970;";
            _characterEntityReferences["rfloor"] = "&#8971;";
            _characterEntityReferences["lang"] = "&#9001;";
            _characterEntityReferences["rang"] = "&#9002;";
            _characterEntityReferences["loz"] = "&#9674;";
            _characterEntityReferences["spades"] = "&#9824;";
            _characterEntityReferences["clubs"] = "&#9827;";
            _characterEntityReferences["hearts"] = "&#9829;";
            _characterEntityReferences["diams"] = "&#9830;";
//            _characterEntityReferences["quot"] = "&#34;";
//            _characterEntityReferences["amp"] = "&#38;";
//            _characterEntityReferences["lt"] = "&#60;";
//            _characterEntityReferences["gt"] = "&#62;";
            _characterEntityReferences["OElig"] = "&#338;";
            _characterEntityReferences["oelig"] = "&#339;";
            _characterEntityReferences["Scaron"] = "&#352;";
            _characterEntityReferences["scaron"] = "&#353;";
            _characterEntityReferences["Yuml"] = "&#376;";
            _characterEntityReferences["circ"] = "&#710;";
            _characterEntityReferences["tilde"] = "&#732;";
            _characterEntityReferences["ensp"] = "&#8194;";
            _characterEntityReferences["emsp"] = "&#8195;";
            _characterEntityReferences["thinsp"] = "&#8201;";
            _characterEntityReferences["zwnj"] = "&#8204;";
            _characterEntityReferences["zwj"] = "&#8205;";
            _characterEntityReferences["lrm"] = "&#8206;";
            _characterEntityReferences["rlm"] = "&#8207;";
            _characterEntityReferences["ndash"] = "&#8211;";
            _characterEntityReferences["mdash"] = "&#8212;";
            _characterEntityReferences["lsquo"] = "&#8216;";
            _characterEntityReferences["rsquo"] = "&#8217;";
            _characterEntityReferences["sbquo"] = "&#8218;";
            _characterEntityReferences["ldquo"] = "&#8220;";
            _characterEntityReferences["rdquo"] = "&#8221;";
            _characterEntityReferences["bdquo"] = "&#8222;";
            _characterEntityReferences["dagger"] = "&#8224;";
            _characterEntityReferences["Dagger"] = "&#8225;";
            _characterEntityReferences["permil"] = "&#8240;";
            _characterEntityReferences["lsaquo"] = "&#8249;";
            _characterEntityReferences["rsaquo"] = "&#8250;";
            _characterEntityReferences["euro"] = "&#8364;";
            
            _matcher = new Regex("&("+String.Join("|", _characterEntityReferences.Keys.ToArray())+");", RegexOptions.IgnoreCase);
        }
    
        public static String ResolveCharacterEntityReferences(String html)
        {
            return _matcher.Replace(html, m => _characterEntityReferences[m.Groups[1].Value]);
        }
    }
トラックバック - http://subtech.g.hatena.ne.jp/mayuki/20081105