<?php
/**
* The UniSearcher function library. It sets some 'config' variables too. They come first.
**/

// A little something I cooked up to discourage SQL injection attempts, not that they're even POSSIBLE with this app...
if (stripos(rawurldecode($_SERVER['QUERY_STRING']), 'select ')) {
    echo "<H1>FRUSTRATING, ISN'T IT??</H1>"; die();
}

$G=array();
ExtractGlobals(); // Slightly safer than $_REQUEST.
//print_r($G); die;

// This will be working, I hope, under both php5.6 and php8.2... ha.
if (version_compare(phpversion(), '8.2.0', '<')) {
    // Probably 5.6
} else {
    // At least PHP-8.2.0
    function mysql_pconnect($host, $username, $password) {
        return mysqli_connect($host, $username, $password);
    }
}

define('CLIPBOARD_LINK', '<a href="Javascript: PopIt(\'\')">');

$hrcolor="cc66cc";
$DbConnector="/var/www/sites/localhost/etc/db.php";
$maxresults=1000; // the most glyphs to ever show at once on a glyph-list page
$pageblocks=array();

$page=hexpad($G['page'], 2); // Will default to "00" if $G['page'] isn't set yet.
if (strlen($page) > 3) {
    // DOS attempt: setting $page to be HUUUUUUUUUUUUGE. Screw them.
    unset($page, $G['page'], $G['subpage']);
}
if (strlen($G['glyph']) > 5) {
    // Same kinda hack.
    unset($G['glyph']);
}

if (isset($G['subpage']))
    $subpage=substr($G['subpage'], 0, 1); // should be only one digit long...
else
    unset($subpage);

// Unfortunately, hexdec("YerMama") returns 3,754. I blame PHP. But it means I can't check
// hex strings for validity, other than seeing if it evals to more than it should...
// The only solution would be to pass *everything* as decimal instead of hex. Lots of rewriting.
if (hexdec($page) < 0 || hexdec($page) > 0xFF) $page="00";           // can't exceed 255
if (hexdec($subpage) < 0 || hexdec($subpage) > 0xF) unset($subpage); // can't exceed 15
if (hexdec($glyph) < 0 || hexdec($glyph) > 0xFFFFF) unset($glyph);   // can't exceed 1,048,575

if ($_SERVER["GATEWAY_INTERFACE"]=="") {
    // We're running the parser from cmdline, which is the only time we need to know $DbPacketMax.
    $DbPacketMax=FALSE;
    $res=dosql("show variables like 'max_allowed_packet'");
    if (mysql_num_rows($res)>0) {
        while ($row=mysql_fetch_row($res)) {
            if ($row[0]=="max_allowed_packet") {
                $DbPacketMax=$row[1];
                break;
            }
        }
    }
    if (!$DbPacketMax) { $DbPacketMax=1024*1024; } // We'll default to one meg ("1M" in mysqlspeak)
}

// I consider 'glyph' a search var because it doesn't lead to the 256-char chart page.
$SearchVars=array('shiftjis', 'unihex', 'unidecimal', 'utf8hex', 'utf8', 'desctext', 'deftext', 'prontext', 'glyph');

// Special unicode control characters; not used anywhere, this is just for reference I guess,
// but you could Codepoint2Utf8() them or &#X; and spit them out directly or... something...
define('EMBED_LR',      0x202A);
define('EMBED_RL',      0x202B);
define('POP_DF',        0x202C);
define('OVER_LR',       0x202D);
define('OVER_RL',       0x202E);
define('ZERO_SPACE',    0x200B);
define('ZERO_NJOINER',  0x200C);
define('ZERO_JOINER',   0x200D);
define('ZERO_NBSP',     0xFEFF);
define('VARSEL1',       0xFE00);
define('VARSEL2',       0xFE01);
define('VARSEL3',       0xFE02);
define('VARSEL4',       0xFE03);
define('VARSEL5',       0xFE04);
define('VARSEL6',       0xFE05);
define('VARSEL7',       0xFE06);
define('VARSEL8',       0xFE07);
define('VARSEL9',       0xFE08);
define('VARSEL10',      0xFE09);
define('VARSEL11',      0xFE0A);
define('VARSEL12',      0xFE0B);
define('VARSEL13',      0xFE0C);
define('VARSEL14',      0xFE0D);
define('VARSEL15',      0xFE0E);
define('VARSEL16',      0xFE0F);

function dosql($sq) {
    // This executes the SQL query specified in $sq.
    global $dblink, $DbConnector;
    require_once($DbConnector);
//  echo "<h3>$sq</h3>\n";
    $res=mysql_query($sq, $dblink);
    if (mysql_error($dblink)) {
        echo "<h3>SQL Query Failure!</h3>\n<I>" .
            mysql_error($dblink) . ":</I><P>\n$sq<P>\n";
    }
    return $res;
}

function MakeQueryString() {
    global $PHP_SELF, $G;
//    if (empty($_GET) && empty($_POST)) return $PHP_SELF;
    if (empty($G)) return $PHP_SELF;
    if (substr($PHP_SELF, 0, 1) != '/') {
        $out="/$PHP_SELF";
    } else {
        $out=$PHP_SELF;
    }
//    $tmparr=array_merge($_GET, $_POST); // the 2nd array takes precedence when an element from each has the same key.
    $tmparr=$G;
    foreach (array_keys($tmparr) as $k) {
        $tmparr[$k]=rawurlencode($tmparr[$k]);
    }
    $tmpstr=ImplodeQueryString($tmparr);
    if ($tmpstr=='') return $out;
    $out .= CleanupQueryString($tmpstr);
    return $out;
}

function ExplodeQueryString($in) {
    // Returns an assoc. array holding a querystring's variables, leaving them url-encoded (or not).
    $out=array();
    if (!preg_match('/.*?\?(.+)/', $in, $regs)) return $out;
    $vars=trim($regs[1]);
    foreach (preg_split('/\&/', $vars, -1, PREG_SPLIT_NO_EMPTY) as $pair) {
        if (!preg_match('/(.+?)=(.+)/', $pair, $regs)) continue; // auto-deletes empty vars
        $out[trim($regs[1])]=trim($regs[2]);
    }
    return $out;
}

function ImplodeQueryString($vars) {
    // This only returns the querystring part, starting with '?'. Add your own script filename before it.
    // Doesn't touch a var's url-encoding, just like the Explode version; rawurlencode() your vars before calling this.
    if (empty($vars)) return '';
    $out='?';
    foreach ($vars as $k=>$v) {
        $out.="$k=$v&";
    }
    if ($out == '?') return '';
    return substr($out, 0, -1);
}

function CleanupQueryString($in) {
    // Removes variables from $in that aren't set to anything (in "?a=&b=2&c=", a and c would be removed).
    $vars=ExplodeQueryString($in);
    if (empty($vars)) return $in;
    $out=array();
    foreach (array_keys($vars) as $k) {
        if ($k=='' || empty($vars[$k])) {
            continue;
        }
        $out[$k]=$vars[$k];
    }
    return ImplodeQueryString($out);
}

function footer() {
  global $start;
  echo "<P align=\"center\"><font size=\"-1\"><I>(c)2005, <a href=\"mailto:phee@isthisthingon.org\">Brett Baugh</a> " .
      "- but it's open source.</I> (<a href=\"ex.html\" target=\"_blank\">Brief Description</a> | " .
      "<a href=\"unisearch-1.1.tgz\">Source Code Distribution</a>)</font></P>\n" .
      "</body></html>\n";
  die;
  return;
}

function show_blocks($start, $end, $pf=FALSE, $url=FALSE) {
  // Makes the little "key" table at the top of 256-char charts for color-coding each char block.
  global $pageblocks;
  if ($pf) $pf=TRUE; else $pf=FALSE;
  $allblocks=loadblocks();
  $Dstart=hexdec($start);
  $Dend=hexdec($end);
  $tmp=array();
  foreach ($allblocks as $b) {
    if ($b[1]<$Dstart || $b[0]>$Dend) { continue; }
    $tmp[$b[0]]=$b;
  }
  asort($tmp);
  $out="<table border=1 cellpadding=3 cellspacing=0>\n<tr>";
  $lenchk=strlen($out);
  $i=0;
  foreach ($tmp as $block) {
    $pageblocks[]=array($block[0], $block[1]);
    $hs=strtoupper(hexpad(dechex($block[0])));
    $he=strtoupper(hexpad(dechex($block[1])));
    $color=bgcolor($block[0]);
    $block_url=($url ? "$url#{$block[2]}" : "#{$block[2]}");
    if ($pf)
      $out.="{$block[2]}&nbsp;(0x$hs-0x$he), ";
    else
      $out.="<td bgcolor=\"$color\"><B><a href=\"$block_url\">{$block[2]}</a></B>&nbsp;(0x$hs-0x$he)</td>";
    if (!$pf && ++$i%3==0) { $out.="</tr>\n<tr>"; }
  }
  if (strlen($out)==$lenchk) { return FALSE; }
  if (!$pf && $i%3 && $i>3) {
    $cs=3-($i%3);
    $out.="<td bgcolor=\"#999999\" colspan=\"$cs\">&nbsp;</td>";
  }
  if ($pf) {
    $out=substr($out, 0, -2);
  } else {
    $out .= "</tr>\n</table>\n";
  }
  return $out;
}

function block_menu() {
  // Makes the select field for picking what char block to zip to.
  global $allblocks;
  $out="<select name=\"codeblock\" onChange=\"submit();\">\n" .
    "<option value=\"\">--Jump To Character Block--</option>\n";
  foreach ($allblocks as $block) {
    $starthex=strtoupper(hexpad(dechex($block[0])));
    $endhex=strtoupper(hexpad(dechex($block[1])));
    $s=substr($starthex, 0, 3);
    $out .= "<option value=\"$s\">0x$starthex-0x$endhex: {$block[2]}</option>\n";
  }
  $out .= "</select>\n";
  return $out;
}

function split_pagenum($in) {
  // Turns "3" OR "03" into array("0", "3"); "1F" into array("1", "F"); etc.
  $in=hexpad((string)$in, 2);
  if (preg_match('/^(.)(.)$/', $in, $regs)) {
    $upage=(string)$regs[1];
    $rpage=(string)$regs[2];
  } elseif (preg_match('/^(.)$/', $in, $regs)) {
    $upage="0";
    $rpage=(string)$regs[1];
  } else {
    $upage="0";
    $rpage="0";
  }
  if ($upage=='') $upage="0";
  if ($rpage=='') $rpage="0";
  return array($upage, $rpage);
}

function show_upag() {
  // Shows the top row of the pretty pagination selector menu table.
  global $page, $subpage, $PHP_SELF;
  echo "<tr>\n<th align=right class=\"range\">Top-level:&nbsp;</th>\n<td>\n" .
    "<table border=\"0\" cellpadding=\"3\" cellspacing=\"0\">\n<tr>\n";
  list($upage, $rpage)=split_pagenum($page);
  for ($i=0; $i < 16; $i++) {
    $hex=strtoupper(dechex($i));
    if ($upage==$hex) { $c="pagsel"; } else { $c="pag"; }
    echo "<th valign=\"bottom\" width=\"20\" class=\"$c\">" .
      "<a class=\"$c\" href=\"$PHP_SELF?page=$hex$rpage\">$hex</a></th>\n";
  }
  echo "<td align=\"center\" class=\"range\"><I>Page/Subpage:</I></td>\n</tr>\n</table>\n</td></tr>\n";
  return;
}

function show_pag() {
  // Shows the middle row of the pretty pagination selector menu table.
  global $page, $subpage, $PHP_SELF;
  list($upage, $rpage)=split_pagenum($page);
  echo "<tr>\n<th align=right class=\"range\">Next-level:&nbsp;</th>\n<td>\n" .
    "<table border=0 cellpadding=3 cellspacing=0>\n<tr>\n";
  for ($i=0; $i < 16; $i++) {
    $hex=strtoupper(dechex($i));
    if (trim($rpage)!="" && $rpage==$hex) { $c="pagsel"; } else { $c="pag"; }
    echo "<th valign=\"bottom\" width=\"20\" class=\"$c\">" .
      "<a class=\"$c\" href=\"$PHP_SELF?page=$upage$hex\">$hex</a></th>\n";
  }
  echo "<th align=left class=\"range\">";
  if (trim($rpage)!="") {
    $from=$page."000";
    $to=$page."FFF";
    $dfrom=number_format(hexdec($from));
    $dto=number_format(hexdec($to));
    echo "$from - $to ($dfrom - $dto)";
  } else {
    echo "&nbsp;";
  }
  echo "</th></tr>\n</table>\n</td></tr>\n";
  return;
}

function show_sub() {
  // Shows the bottom row of the pretty pagination selector menu table.
  global $page, $subpage, $PHP_SELF;
  echo "<tr>\n<th align=\"right\" class=\"range\">Last-level:&nbsp;</th>\n<td>\n" .
    "<table width=\"100%\" border=\"0\" cellpadding=\"3\" cellspacing=\"0\">\n<tr>\n";
  for ($i=0; $i<16; $i++) {
    $hex=strtoupper(dechex($i));
    if (trim($subpage)!="" && $subpage==$hex) { $c="pagsel"; } else { $c="pag"; }
    echo "<th valign=\"bottom\" width=\"20\" class=\"$c\">" .
      "<a class=\"$c\" href=\"$PHP_SELF?page=$page&subpage=$hex\">$hex</a></th>\n";
  }
  echo "<th align=\"left\" class=\"range\">";
  if (trim($subpage)!="") {
    $from=$page.$subpage."00";
    $to=$page.$subpage."FF";
    $dfrom=number_format(hexdec($from));
    $dto=number_format(hexdec($to));
    echo "$from - $to ($dfrom - $dto)";
  } else {
    echo "&nbsp;";
  }
  echo "</th></tr>\n</table>\n</td></tr>\n";
  echo show_prevnext();
  return;
}

function show_prevnext() {
  // Shows the "previous/next page" links for under the other 'pag' tables.
  // Only show it if there is a $subpage set.
  global $page, $subpage, $PHP_SELF;
  if (!isset($subpage)) return '';
  $dpage=hexdec($page);
  $dsub=hexdec($subpage);
  $prevP=$nextP=$dpage;
  $prevS=$dsub-1;
  $nextS=$dsub+1;
  if ($prevS < 0) {
    $prevS=15;
    --$prevP;
  }
  if ($prevP < 0) $prevP=FALSE;
  if ($nextS == 16) {
    $nextS=0;
    ++$nextP;
  }
  if ($nextP > 255) $nextP=FALSE;
  foreach (array('prevP', 'nextP') as $k) {
    if ($$k===FALSE) continue;
    $$k=hexpad(strtoupper(dechex($$k)), 2);
  }
  foreach (array('prevS', 'nextS') as $k) {
    $$k=strtoupper(dechex($$k));
  }

  $out="<tr>\n<th nowrap align=\"right\" class=\"range\">Block Nav:&nbsp;</th>\n<td>\n" .
    "<table width=\"448\" border=\"0\" cellpadding=\"3\" cellspacing=\"0\">\n<tr>\n" .
    '<th nowrap class="pag" align="left" valign="top">';
  if ($prevP!==FALSE) {
    $out .= "<a class=\"pag\" href=\"$PHP_SELF?page=$prevP&subpage=$prevS\">Previous&nbsp;(0x$prevP$prevS"."00)</a>";
    if ($nextP!==FALSE) $out .= ' | ';
  }
  if ($nextP!==FALSE) {
    $out .= "<a class=\"pag\" href=\"$PHP_SELF?page=$nextP&subpage=$nextS\">Next&nbsp;(0x$nextP$nextS"."00)</a>";
  }
  $out.="</th>\n</tr>\n</table>\n";
  return $out;
}

function bgcolor($val) {
  // Returns one of several colors to use in color-coding char blocks in a chart page.
  global $pageblocks;
  $blockcolor=array("#e0ffff", "#ffe0ff", "#ffffe0");
  $tmp=FALSE;
  foreach (array_keys($pageblocks) as $b) {
    if ($pageblocks[$b][0] <= $val && $pageblocks[$b][1] >= $val) {
      $tmp=$b;
      break;
    }
  }
  if ($tmp===FALSE) {
    // $val isn't in ANY block.
    return "#dddddd";
  }
  $curblock=$tmp % count($blockcolor);
  return $blockcolor[$curblock];
}

function block_name($val) {
  // Returns the name of whatever block the codepoint $uni is in.
  global $allblocks;
  $tmp=FALSE;
  foreach (array_keys($allblocks) as $b) {
    if ($allblocks[$b][0] <= $val && $allblocks[$b][1] >= $val) {
      $tmp=$b;
      break;
    }
  }
  if ($tmp===FALSE) {
    // $val isn't in ANY block.
    return "";
  }
  return $allblocks[$tmp][2];
}

function show_pf_chart() {
  // Makes a printer-friendly 256-char fullpage chart.
  global $page, $subpage, $allcodes, $PHP_SELF;
  $combiners=CombiningGlyphs();
  $start="$page$subpage";
  $out = "<table cellpadding=\"1\" cellspacing=\"0\" class=\"PFtable\">\n<tr>\n";
  for ($i=0; $i<256; $i++) {
    $ihex=hexpad(dechex($i), 2);
    $hex="$start$ihex";
    $hex2=hexpad(dechex(hexdec($hex)));
    $uni=hexdec($hex);
    $u8h=Codepoint2Utf8Hex(array($uni));
    $unichar="&#$uni;";
    if ($combiners[$uni]) {
        $comb='<span class="PFcombiner">' . CombinedChar($hex) . '</span>';
    } else {
        $comb='<span class="PFglyph">' . $unichar . '</span>';
    }

    $out .= "<td class=\"PFcode\">" .
      '<span class="PFinfo">' .
      "$hex2<BR>" .
      "$u8h<BR>" .
      "&amp;#$uni;" .
      "<BR></span>" .
      "$comb</td>\n";

    if ($i%16==15) {
      $out .= "</tr>\n<tr>\n";
    }
  }
  $out .= "</tr>\n</table>\n";
  echo $out;
}

function show_chart() {
  // Makes the big 256-char fullpage chart.
  global $page, $subpage, $allcodes, $PHP_SELF, $hilite;

  $combiners=CombiningGlyphs();
  $start="$page$subpage";
  $blocks=show_blocks($start."00", $start."FF");
  if ($blocks) echo "<font size=-1>Character block(s) on this page:<BR>\n$blocks<P>\n";
  else echo "<font size=-1>This page contains no Unicode-defined character blocks.<P>\n";
  echo "<I>Click on a character to copy it to the " . CLIPBOARD_LINK . "Unisearch Clipboard</a>.</I><BR>\n" .
      "<I>Click a character's Hex Codepoint (top row) to see its details.</I><BR>\n" .
      '<a target="_blank" href="pfpopup.php?page='.$page.'&subpage='.$subpage.'">Printer-Friendly Version</a></font><BR>' .
      "<table border=\"1\" cellpadding=\"3\" cellspacing=\"0\">\n<tr>\n";
  $prev_bgc='';
  for ($i=0; $i<256; $i++) {
    $ihex=hexpad(dechex($i), 2);
    $hex="$start$ihex";
    $hex2=hexpad(dechex(hexdec($hex)));
    $uni=hexdec($hex);
    $jis=$allcodes[$hex];
    $u8h=Codepoint2Utf8Hex(array($uni));
    $deturl="$PHP_SELF?page=$page&subpage=$subpage&glyph=$hex";
    if ($jis) {
      $jis=hexpad(dechex(hexdec($jis)), 4);
    } else {
      $jis="<I>None</I>";
    }
    $bgc=bgcolor($uni);
    if ($bgc != $prev_bgc) {
      $blockname=block_name($uni);
      $anchor="\n<a name=\"$blockname\"></a>\n";
      $prev_bgc=$bgc;
    } else {
      $anchor='';
    }
    if ($hilite && hexdec($hilite)==hexdec($hex)) {
      $bgc="#ffff66";
    }
    $unichar="&#$uni;";
    if ($combiners[$uni]) {
        $comb='<font color="#CC0000">' . CombinedChar($hex) . '</font>';
    } else {
        $comb=$unichar;
    }
    echo "<td nowrap valign=\"top\" align=\"center\" class=\"code\" bgcolor=\"$bgc\">$anchor" .
      '<font style="font-size: 8pt">' .
      "<a class=\"plain\" href=\"$deturl\">" .
      "$hex2</a><BR>" .
      "$u8h<BR>" .
      "$jis<BR>" .
      "&amp;#$uni;" .
      "<BR>" .
      "<BR></font>" .
      "<div onMouseOver=\"myHint.show($uni);\" onMouseOut=\"myHint.hide();\">" .
      "<a class=\"glyph\" href=\"Javascript: PopIt($uni)\">$comb</a></div>" .
      "</td>\n";
    if ($i%16==15) {
      echo "</tr>\n<tr>\n";
    }
  }
  echo "</tr>\n</table>\n";
  return;
}

function show_row($sjis, $descr, $pron, $def, $hexcode) {
  // Utility function for show_glyph()...
    global $PHP_SELF;
    $combiners=CombiningGlyphs();
    $out="";
    $idx=hexpad($hexcode);
    $pagetmp=substr($idx, 0, -3);
    $subtmp=substr($idx, -3, 1);
    $url="$PHP_SELF?page=$pagetmp&subpage=$subtmp&hilite=$idx";
    $dec=hexdec($idx);
    if ($combiners[$dec]) {
        $comb='<font color="#CC0000">' . CombinedChar($idx) . '</font>';
    } else {
        $comb="&#$dec;";
    }
    $u8h=Codepoint2Utf8Hex(array($dec));
    if ($descr=="") $descr="<I>No Description</I>";
    if ($sjis=="") $sjis="<I>None</I>";
    else $sjis=hexpad(dechex(hexdec($sjis)), 4);
    $block=show_blocks($idx, $idx, FALSE, $url);
    $out .= "<table border=\"1\" cellpadding=\"3\" cellspacing=\"0\">\n" .
      "<tr><td rowspan=\"5\" align=\"center\" valign=\"middle\" bgcolor=\"#ffffff\" width=\"80\">" .
      "<a class=\"glyph\" href=\"Javascript: PopIt($dec)\">" .
      "<font style=\"font-size: 48pt\">$comb</font></a></td>\n" .
      "<td><a class=\"plain\" href=\"Javascript: PopIt($dec)\">$descr</a></td></tr>\n" .
      "<tr><td>Unicode (Hex): <a class=\"plain\" href=\"$url\"><B>$idx</B></a></td></tr>\n" .
      "<tr><td>UTF-8 (Hex): <B>$u8h</B></td></tr>\n" .
      "<tr><td>Shift-JIS (Hex): <B>$sjis</B></td></tr>\n" .
      "<tr><td>Unicode (HTML): <B>&amp;#$dec;</B></td></tr>\n" .
      "</tr>\n</table>\n";
    if ($block) $out .= $block;
    $RS='<tr><th nowrap align="right" valign="top">';
    $RM='</th><td align="left" valign="top">';
    $RE="</td></tr>\n";
    $out .= "<table border=\"0\" cellpadding=\"3\" cellspacing=\"0\">\n";
    if ($def) $out .= $RS . "Definition:$RM$def$RE";
    if ($pron) $out .= $RS . "Pronunciation(s):$RM$pron$RE";
    $out .= "</table>\n<hr noshade size=1>\n";
    return $out;
}

function show_glyph(&$idxarr) {
  // Displays all details about the character whose codepoint is $idxarr if it's a hex string.
  // If it's an array, does it for all elements in it, doing the SQL query all at once.
  // All input elements MUST be the unicode codepoint for one character and must be a hex string
  // (like, "0A" for an ascii 10, "63A2" for a 0xE68EA2 in UTF8, etc).

  global $allcodes, $alldescs, $alldefs, $allprons, $desctext, $deftext, $prontext, $maxresults;
  if (!is_array($idxarr)) $idxarr=array($idxarr);
  if (count($idxarr) > $maxresults) {
    echo "Too many results found (over " . number_format($maxresults) .
        "); please refine the search parameters and try again...<BR>\n";
    return;
  }
  $thestring='';
  $mpop='';
  $where="deccode in (";
  $lenchk=strlen($where);
  foreach ($idxarr as $k=>$idx) {
    $idx=HexOnly($idx);
    $dec=hexdec($idx);
    if ($idx=='' || $dec < 1) { // Make sure this one doesn't get shown...
        unset($idxarr[$k]);
        continue;
    }
    $where .= "$dec,";
    $mpop .= "$dec:";
    $thestring .= Codepoint2Utf8(array($dec));
  }
  if (strlen($where)==$lenchk) {
    echo "No results found (illegal input perhaps?).<BR>\n";
    return;
  }
  $where=substr($where, 0, -1) . ")";
  $mpop=substr($mpop, 0, -1);
//echo "WHERE: $where<BR>MPOP: $mpop<BR>";
  $sq="select * from codepoint where $where";
//echo "SQ: $sq<BR>";
  $res=dosql($sq);
  $out="<hr noshade size=1>\nClick on a character (or its name) to copy it to the " .
    CLIPBOARD_LINK . "Unisearch Clipboard</a>.<BR>\n" .
    "<a class=\"plain\" href=\"Javascript: PopMulti('$mpop')\">Click to copy " .
    ($desctext || $deftext || $prontext ?
      "all these characters"  :  "\"<font style=\"font-size: 14pt\">$thestring</font>\""
    ) . " to the clipboard.</a></I><BR>\n" .
    "<hr noshade size=1>\n";
  if (mysql_num_rows($res) > 0) {
    $tmpout=array();
    while ($row=ass(mysql_fetch_assoc($res))) {
      $h=$row['hexcode'];
      if (!isset($tmpout[$h]) && $h) $tmpout[$h]=$row;
    }
    // MySql reorders the results by deccode, the PK. How to tell it not to do that?
    // Answer: You can't. So I have to re-order them myself based on the $idxarr input. Yay.
    foreach ($idxarr as $h) {
      $r=$tmpout[$h];
      if (!is_array($r)) $r=array('hexcode'=>$h, 'descr'=>'<I>Not In Unicode Database</I>');
      $out .= show_row($r['sjis'], $r['descr'], $r['pron'], $r['def'], $r['hexcode']);
    }
  } elseif ($idxarr[0]) { // I'm hoping it held at least ONE entry... but this should never happen, anyway.
    $out .= show_row("", "", "", "", $idxarr[0]);
  }
  echo $out;
  return;
}

function HexOnly($in) {
  // Forces $in to only have valid hex digits in it.
  return preg_replace(array('/0x/i', '/[^a-f\d]/i'), '', trim($in));
}

function is_hex($in) {
  // Tells you whether $in *looks* like valid hexadecimal or not.
//  return (preg_match('/^[a-f\d]+$/i', HexOnly($in)));
  return (preg_match('/^[a-f\d]+$/i', $in));
}

function hexpad($in, $chars=5, $reverse=FALSE) {
  // Makes sure $in is at least $chars characters long by left-padding it with "0".
  // Setting $reverse makes it RIGHT-pad it with "0" instead.
  $in=strtoupper(trim($in)); // because lower-case hex makes me ***TWITCH!!!***
  if ($chars==FALSE) return $in;
  $in=preg_replace('/^0+/', '', $in); // trim off leading 0's first... it's not counter-productive, trust me.
  if ($reverse) $out=str_pad($in, $chars, "0", STR_PAD_RIGHT);
  else $out=str_pad($in, $chars, "0", STR_PAD_LEFT);
  return $out;
}

function ss($in) {
  return htmlspecialchars(stripslashes($in));
}

function ass($array) {
    if (!is_array($array)) return stripslashes($array);
    foreach (array_keys($array) as $k) {
        if (is_array($array[$k])) $array[$k]=ass($array[$k]);
        else $array[$k]=stripslashes($array[$k]);
    }
    return $array;
}

function ArraySupersort(&$in, $sortby, $reverse=FALSE) {
    $out=$sorter=array();
    foreach (array_keys($in) as $k) {
        $sorter[$k]=$in[$k][$sortby];
    }
    natcasesort($sorter); // This can change to meet your needs... asort(), uksort(), whateversort().
    if ($reverse) $sorter=array_reverse($sorter, TRUE);
    foreach (array_keys($sorter) as $k) {
        $out[]=$in[$k];
    }
    $in=$out;
}

function GMT() {
    // A wrapper for microtime to get it as a float instead of a string with a space in it... :'/
    $tmp=preg_split('/\s+/', microtime(), -1, PREG_SPLIT_NO_EMPTY);
    return (float)($tmp[0]+$tmp[1]);
}

function SQuote($in) {
    if (!is_array($in))
        return mysql_real_escape_string(ass($in));
    foreach (array_keys($in) as $k) {
        $in[$k]=SQuote($in[$k]);
    }
    return $in;
}  

function jsquote($in) {
/*
 * Cleans up a string meant to be put into javascript as a variable's setting delimited by ' marks.
 * JS doesn't like \n in the middle of a definition, so this also breaks it up by replacing every
 * CR/LF with " ' +\n  '", which even indents the code nicely. Like, this:

HINTS_ITEMS[20496]='<I>Cantonese:</I> Suk-1<BR>
<I>Japanese On:</I> Shuku<BR>
<BR><I>Hastily; suddenly</I>';

 * ...becomes this:

HINTS_ITEMS[20496]='<I>Cantonese:</I> Suk-1<BR> ' +
  '<I>Japanese On:</I> Shuku<BR> ' +
  '<BR><I>Hastily; suddenly</I>';

 * See how much nicer that looks? :) Just don't replace ' with \' AFTER calling this or you'll suffer THE PAIN!!
 */

  $from=array("/\\'/", "/'/", '/[\r\n]+/');
  $to=array("'", "\\'", " ' +\n  '");
  return preg_replace($from, $to, $in);
}

function qs($in) {
    // Fixes values for insertion into <form> fields' value="" attribute.
    return preg_replace('/"/', '&quot;', $in);
}

function progressbar($current, $total) {
  // makes the 0% - 100% status bar thingy. Used in the parser only.
  if ($current>$total) return;
  if ($total==1) {
    echo "100%"; flush();
    return;
  }
  if ($current==1) {
    echo "0%-"; flush();
    return;
  }
  if ($current==$total) {
    echo "100%"; flush();
    return;
  }
  $percent=floor(($current/$total)*100);
  if ($percent % 10 == 0 && (($current-1)/$total*100) < $percent)
    echo "$percent%-"; flush();
}

function loadblocks($nameorder=TRUE, $usehex=FALSE) {
  // This loads up ALL blocks, since they're all needed on every page for the dropdown. Return array:
  // $allblocks[n]=array(0=>block_start_integer, 1=>block_end_integer, 2=>block_name_string)
  // If $nameorder is FALSE, it will be ordered by the start - otherwise, by block name.
  // If $usehex isn't FALSE, it will return hex strings for start/end instead of integers.
  static $allblocks;
  if ($nameorder==FALSE || $usehex) unset($allblocks); // Only cache the default-params output!
  if (!isset($allblocks)) {
    $allblocks=array();
    if ($usehex) $sq="select hex(start), hex(end), name from block"; else $sq="select start, end, name from block";
    if ($nameorder) $sq.=" order by name"; else $sq.=" order by start";
    $res=dosql($sq);
    while ($row=ass(mysql_fetch_row($res))) {
      $allblocks[]=array($row[0], $row[1], $row[2]);
    }
  }
  $out=$allblocks;
  if ($nameorder==FALSE || $usehex) unset($allblocks); // Only cache the default-params output!
  return $out;
}

function loadpage($start) {
  // Loads up the descs, defs, prons, and sjis ("codes") arrays for the
  // 256 characters starting at $start (in DECIMAL).
  global $alldescs, $allprons, $alldefs, $allcodes;
  $end=$start+255; // We always show exactly 256 characters per chart page. ALWAYS. Even if none exist.
  $sq="select hexcode, sjis, descr, pron, def from codepoint where deccode >= $start and deccode <= $end order by deccode";
  $res=dosql($sq);
  while ($row=ass(mysql_fetch_row($res))) {
    $cp=$row[0];                // hexadecimal codepoint
    $allcodes[$cp]=$row[1];     // SJIS code, if any
    $alldescs[$cp]=$row[2]; // Character description
    $allprons[$cp]=$row[3]; // Pronunciation, if any
    $alldefs[$cp]=$row[4];  // English Definition, if any
  }
  return TRUE;
}

function sjis_search($shiftjis) {
  // Returns the hex codepoint for THE character with the shift-JIS code $shiftjis (if any).
  $shiftjis=SQuote(hexpad(strtoupper($shiftjis)));
  $sq="select hexcode from codepoint where sjis like '$shiftjis'";
  $res=dosql($sq);
  if (mysql_num_rows($res) > 0) $k=mysql_result($res, 0, 0);
  else $k=FALSE;
  return $k;
}

function descr_search($descr) {
  // Displays a glyph table for all characters with $descr in their descriptions.
  $descr=SQuote(preg_replace('/\s+/', '%', ass(trim($descr))));
  $sq="select hexcode from codepoint where descr like '%$descr%'";
  $res=dosql($sq);
  $num=mysql_num_rows($res);
  if ($num > 0) {
    echo "<P><B>" . number_format($num) . " total result".($num==1?'':'s').".</B><BR>\n"; flush();
    $getcodes=array();
    while ($row=mysql_fetch_row($res)) {
      $getcodes[]=$row[0];
    }
    show_glyph($getcodes);
  }
  echo "<B>" . number_format($num) . " total result".($num==1?'':'s').".</B>\n";
  define('FIRST_RESULT', $getcodes[0]);
  return $num;
}

function def_search($def) {
  // Displays a glyph table for all characters with $def in their definitions.
  $def=SQuote(preg_replace('/\s+/', '%', ass(trim($def))));
  $sq="select hexcode from codepoint where def like '%$def%'";
  $res=dosql($sq);
  $num=mysql_num_rows($res);
  if ($num > 0) {
    echo "<P><B>" . number_format($num) . " total result".($num==1?'':'s').".</B><BR>\n"; flush();
    $getcodes=array();
    while ($row=mysql_fetch_row($res)) {
      $getcodes[]=$row[0];
    }
    show_glyph($getcodes);
  }
  echo "<B>" . number_format($num) . " total result".($num==1?'':'s').".</B>\n";
  define('FIRST_RESULT', $getcodes[0]);
  return $num;
}

function pron_search($pron) {
  // Displays a glyph table for all characters with $pron in their pronunciations (exact matches only; TRICKY!).
  // Output buffering is required so we can put the number of results at the top too.
  $dq=preg_replace('/\s+/', '.*', preg_quote($pron, "/")); // treat spaces as wildcards
  $pron=SQuote(preg_replace('/\s+/', '%', ass(trim($pron)))); // ditto
  $sq="select hexcode, pron from codepoint where pron like '%$pron%'";
  $res=dosql($sq);
  ob_start();
  $num=0;
  if (mysql_num_rows($res) > 0) {
    $getcodes=array();
    while ($row=ass(mysql_fetch_row($res))) {
      $p=$row[1];
      if (!preg_match('/\W+?'.$dq.'\W+?/i', " $p ")) { continue; }
      $getcodes[]=$row[0];
      ++$num;
    }
    show_glyph($getcodes);
  }
  $tmp=ob_get_clean();
  echo "<P><B>" . number_format($num) . " total result".($num==1?'':'s').".</B><BR>\n$tmp" .
    "<B>" . number_format($num) . " total result".($num==1?'':'s').".</B>\n";
  define('FIRST_RESULT', $getcodes[0]);
  return $num;
}

function IsSearchPage() {
    // Tells you whether the current page being viewed contains (or should contain) search results.
    global $G, $SearchVars;
    foreach ($SearchVars as $v) {
        if ($G[$v]) {
            return TRUE;
            break;
        }
    }
    return FALSE;
}

function LogQuery($field, $string) {
    // This logs a search query using the form fields on the pages.
    // Ain't takin' no chances with THIS one. This is the only place in the UI any sql inserts happen.
    $string=SQuote(trim(ass($string)));
    $sq="insert into searches values (0, '".SQuote($field)."', '$string', '{$_SERVER['REMOTE_ADDR']}', " . time() . ')';
    dosql($sq); 
    return TRUE;
}

function ExtractGlobals($types="gp") {
    global $G;
    if (!isset($G)) $G=array();
    $out=array();
    $types=strtolower($types);
    if (strpos($types, "g")!==FALSE) $out=array_merge($out, $_GET);
    if (strpos($types, "p")!==FALSE) $out=array_merge($out, $_POST);
    if (strpos($types, "c")!==FALSE) $out=array_merge($out, $_COOKIE);
    if (strpos($types, "s")!==FALSE) $out=array_merge($out, $_SESSION);
    if (strpos($types, "e")!==FALSE) $out=array_merge($out, $_ENV);
    if (strpos($types, "f")!==FALSE) $out=array_merge($out, $_FILES);
    $G=$out;
}

function RecMkdir($dir, $mode=0644) {
    // PHP's mkdir() isn't recursive. This one is.
    exec ("mkdir -p \"$dir\"", $crap, $ret);
    return $ret;
}

function GlyphInfo($glyph) {
    // Returns a character's entire DB row ($glyph is a hex codepoint).
    $glyph=hexpad($glyph);
    $dec=hexdec($glyph);
    if (!is_numeric($dec) || $dec < 0 || $dec > 0xFFFFF) return array();
    static $cache;
    if (!isset($cache[$glyph])) {
        $cache[$glyph]=array();
        // Always use the decimal version when selecting from the database. ALWAYS!
        $sq="select * from codepoint where deccode=$dec";
        $res=dosql($sq);
        if ($res && mysql_num_rows($res)) {
            $cache[$glyph]=ass(mysql_fetch_assoc($res));
        }
    }
    return $cache[$glyph];
}

function CombiningGlyphs() {
    // Returns an array of every 'combining-form' glyph's decimal codepoint => canonical_combining_class.
    // There are 681 of them... non-trivial, but necessary.
    static $cache;
    if (!isset($cache)) {
        $cache=array();
        $sq="select deccode, combiner from codepoint where combiner != '0'";
        $res=dosql($sq);
        if ($res && mysql_num_rows($res)) {
            while (list($dec, $int)=mysql_fetch_row($res)) {
                $cache[$dec]=$int;
            }
        }
    }
    return $cache;
}

function CombinedChar($hex) {
    // Adds $hex to the right type of character for the block it's in. This is unfortunately
    // a very, very manual procedure; I can't find a Unicode-standard way to determine what
    // characters combine with what others. *None* of the combining lists seem complete. So I
    // have to go through them, all 418 of them, and find what mixed with what else produces
    // an *actual* combined character. It's more of a lookup table, really. Oh, and the $hex
    // variable should be a hex codepoint STRING, by the way...

    $b=CodepointToBlock($hex); // array(blockstart, blockend, blockname)
    switch($b[0]) {
        // *Most* blocks don't have any combiners in them, thankfully. The following ones do.
        // It's just a list of blocks and a char for each, like this:
        //     case <combiner's codeblock start>: $out=<codepoint to combine with>; break;
        // All these numbers are in regular DECIMAL. It's just easier that way, but 0x1234===4660.
        case 768: $out=959; break;
        case 1024: $out=1086; break;
        case 1424: $out=1488; break;
        case 1536: $out=1607; break;
        case 1792: $out=1816; break;
        case 2304: $out=2336; break;
        case 2432: $out=2466; break;
        case 2560: $out=2600; break;
        case 2688: $out=2743; break;
        case 2816: $out=2848; break;
        case 2944: $out=2986; break;
        case 3072: $out=3120; break;
        case 3200: $out=3248; break;
        case 3328: $out=3360; break;
        case 3584: $out=3607; break;
        case 3712: $out=3751; break;
        case 3840: $out=3926; break;
        case 4608: $out=4899; break;
        case 4864: $out=4928; break;
        case 8400: $out=8420; break;
        case 12288: $out=12353; break;
        case 12352: $out=12353; break;
        case 64256: $out=64294; break;
        case 65056: $out=65056; break;

        // All the ones for fonts I don't have... update them if it's ever possible:
        case 1984: // NKo (North Korean?)
        case 3456: // Sinhala
        case 4096: // Myanmar
        case 5888: // Tagalog
        case 5920: // Hanunoo
        case 5952: // Buhid
        case 5984: // Tagbanwa
        case 6016: // Khmer
        case 6144: // Mongolian
        case 6400: // Limbu
        case 6528: // New Tai Lue
        case 6656: // Buginese
        case 6912: // Balinese
        case 7616: // Combining Diacritical Marks Supplement
        case 43008: // Syloti Nagri
        case 68096: // Kharoshthi
        case 119040: // Musical Symbols
        case 119296: // Ancient Greek Musical Notation
            $out=32;
            break; // Some, I can't see... so we'll just put a space with them.
        // Anything else, just put a small "x" with it. Probably won't work, but... ohwell.
        default: $out=120; break;
    }
    // Now send back the final, *combined* character as a UTF-8 string...
    return Codepoint2Utf8(array($out, hexdec($hex)));
}

function CodepointToPages($hex) {
    // Tells you the page and subpage to use for $hex.
    if (empty($hex)) return array();
    $hex=hexpad($hex);
    $page=substr($hex, 0, 2);
    $subpage=substr($hex, 2, 1);
    return array($page, $subpage);
}

function CodepointToBlock($hex) {
    // Tells you the first and last codepoints and the name of whatever block $hex is in.
    // If no valid block is found for it, returns array(-1, -1, ''). Oh, look, that's obvious:
    if (!is_hex($hex)) return array(-1, -1, '');
    static $out;
    $hex=hexpad($hex);
    if (!isset($out[$hex])) {
        $allblocks=loadblocks(); // don't overwrite the global one!
        //  start_dec  end_dec  blockname
        //      0         1         2
        $out[$hex]=array(-1, -1, '');
        $dec=hexdec($hex);
        foreach ($allblocks as $b) {
            if ($dec >= $b[0] && $dec <= $b[1]) {
                $out[$hex]=$b;
                break;
            }
        }
    }
    return $out[$hex];
}

function MakeEndianString($str, $end='l') {
    // Pretends $str is a binary string of hex and produces what it'd look like
    // in a little- or big-endian file hexdump or something... useful for cut'n'paste?
    if ($end != 'l') $end='b';
    $out='';
//    $str=strtoupper(preg_replace('/[^a-f\d]/i', '', $str));
    $str=HexOnly($str);
    $len=strlen($str);
    if ($len < 1) return $out;
    elseif ($len < 4) $str=hexpad($str, 4);
    for ($i=0; $i<$len; $i+=4) {
        $sub=substr($str, $i, 4);
        if (strlen($sub) < 4) $sub=hexpad($sub, 4, TRUE); // have to pad the RIGHT instead here...
        $left=substr($sub, 0, 2);
        $right=substr($sub, 2, 2);
        if ($end=='l') $out .= "$right$left ";
        else $out .= "$left$right ";
    }
    return trim($out);
}

function Utf8HexToUrl($hex) {
    // Turns, say, "EF8899" into "%EF%88%99", for more-portable insertion into URLs.
    // Clean it up first so unexpected chars don't get included.
//    $hex=preg_replace('/[^a-f\d]/i', '', $hex);
    $hex=HexOnly($hex);
    $len=strlen($hex);
    if ($len < 3) {
        return ('%' . hexpad($hex, 2));
    }
    $out='';
    // Since it's hex, and might have an odd number of chars, we start from the right side.
    for ($i=$len-2; $i>=0; $i-=2) {
        $tmp=substr($hex, $i, 2);
        $out='%' . hexpad($tmp, 2) . $out; // adds to start of string instead of the end.
    }
    return $out;
}

function Utf8HexToPhpString($hex) {
    // Turns, say, "EF8899" into "\xEF\x88\x99", to ease use of UTF-8 in scripting languages. Not just PHP. :)
    // Clean it up first so unexpected chars don't get included.
    $hex=preg_replace('/[^a-f\d]/i', '', $hex);
    $len=strlen($hex);
    if ($len < 3) {
        return ('\x' . hexpad($hex, 2));
    }
    $out='';
    // Since it's hex, and thus PROBABLY has an odd number of chars, we start from the right side.
    for ($i=$len-2; $i>=0; $i-=2) {
        $tmp=substr($hex, $i, 2);
        $out='\x' . hexpad($tmp, 2) . $out; // pads start of string instead of the end.
    }
    return $out;
}

function Utf16ToUtf8($str) {
    // This isn't actually *used* anywhere, but it looked handy so I stuck it in. Programmatic License.
    // Apparently, it returns the UTF8 version of a UTF16 character, which is passed here as a string
    // that *has* to have a BOM at its start... wonder why that is; isn't there an endian detector?
    $c0 = ord($str[0]);
    $c1 = ord($str[1]);
    if ($c0 == 0xFE && $c1 == 0xFF) {
        $be = true;
    } else if ($c0 == 0xFF && $c1 == 0xFE) {
        $be = false;
    } else {
//        return $str;
// Maybe this'll help... after all, it SHOULD always be little-endian, for me at least. :)
        $str=chr(0xFF) . chr(0xFE) . $str;
        $be=FALSE;
    }
    $str = substr($str, 2); // here's where it chops off the BOM that *has* to be there.
    $len = strlen($str);
    $dec = '';
    for ($i = 0; $i < $len; $i += 2) {
        $c = ($be) ? ord($str[$i]) << 8 | ord($str[$i + 1]) : 
            ord($str[$i + 1]) << 8 | ord($str[$i]);
        if ($c >= 0x0001 && $c <= 0x007F) {
            $dec .= chr($c);
        } else if ($c > 0x07FF) {
            $dec .= chr(0xE0 | (($c >> 12) & 0x0F));
            $dec .= chr(0x80 | (($c >>  6) & 0x3F));
            $dec .= chr(0x80 | (($c >>  0) & 0x3F));
        } else {
            $dec .= chr(0xC0 | (($c >>  6) & 0x1F));
            $dec .= chr(0x80 | (($c >>  0) & 0x3F));
        }
    }
    return $dec;
}

/*
You know that unreadable garbage Windows turns multibyte-character filenames into? The stuff
that looks like little boxes made of ticky-tacky and question marks and miscellaneous
symbols that usually show up in file names? This function will, I *hope*, convert that
into readable actual multibytes. It will only work on Japanese filenames entered by
Japanese people, for now... that's what's stored by their Windows boxes in Shift-JIS encoding.
*/
function WinCrap2Utf8($str) {
// This seems to do no better than all the code I spent all day writing below...
//    return mb_convert_encoding($str, "UTF-8", "SJIS, sjis-win, JIS, EUC-JP, eucjp-win");
// However, that function DOES fail to "detect" that it's sjis sometimes, so... I'll try my own.
    $arr=Utf8StringToCodepoints($str);
    $out=$prevchar='';
    foreach ($arr as $dec) {
        $char=Codepoint2Utf8($dec);
        if ($prevchar) {
            $buffer=$prevchar.$char;
            $out .= @mb_convert_encoding($buffer, "UTF-8", "SJIS");
            $prevchar='';
            continue;
        }
        if (($dec >= 0x81 && $dec <= 0x9F) || ($dec >= 0xE0 && $dec <= 0xFC)) {
            // Combine with the next char and convert *that*.
            $prevchar=$char;
            continue;
        }
        $out .= @mb_convert_encoding($char, "UTF-8", "SJIS");
        $prevchar='';
    }
    return $out;
}

/*
 * All the remaining functions were... borrowed from something called "php-utf8"
 * by Henri Sivonen. I can't even remember where I found it, but it's GPL so yay.
 * Oh; I adapted Codepoint2Utf8Hex() out of Codepoint2Utf8(), and adapted the
 * Utf8toCodepoint() function from Utf8StringToCodepoints() to accept a hex
 * string as input instead of a binary string of UTF8 characters.
 */

function Codepoint2Utf8($arr) {
  // Returns the ACTUAL utf8 chars for the DECIMAL codepoints in $arr[] or $arr.
  // The number(s) you pass here MUST be integers, NOT strings! There is NO DIFFERENCE between 16 and 0x10.
  if (!is_array($arr)) {
    if ($arr) $arr=array($arr);
    else return '';
  }
  $dest = '';
  foreach ($arr as $src) {
    if($src < 0) {
      return FALSE;
    } else if ( $src <= 0x007f) {
      $dest .= chr($src);
    } else if ($src <= 0x07ff) {
      $dest .= chr(0xc0 | ($src >> 6));
      $dest .= chr(0x80 | ($src & 0x003f));
    } else if($src == 0xFEFF) {
      // nop -- zap the BOM
    } else if ($src >= 0xD800 && $src <= 0xDFFF) {
      // found a surrogate
      return FALSE;
    } else if ($src <= 0xffff) {
      $dest .= chr(0xe0 | ($src >> 12));
      $dest .= chr(0x80 | (($src >> 6) & 0x003f));
      $dest .= chr(0x80 | ($src & 0x003f));
    } else if ($src <= 0x10ffff) {
      $dest .= chr(0xf0 | ($src >> 18));
      $dest .= chr(0x80 | (($src >> 12) & 0x3f));
      $dest .= chr(0x80 | (($src >> 6) & 0x3f));
      $dest .= chr(0x80 | ($src & 0x3f));
    } else {
      // out of range
      return FALSE;
    }
  }
  return $dest;
}

function Codepoint2Utf8Hex($arr=FALSE) {
  // Returns a HEX REPRESENTATION of the utf8 for the DECIMAL codepoints in $arr[] or $arr.
  // The number(s) you pass here MUST be integers, NOT strings!
  if (!is_array($arr)) {
    if ($arr!==FALSE) $arr=array($arr);
    else return '';
  }
  $dest = 0x0;
  foreach ($arr as $src) {
    if($src < 0) {
      return FALSE;
    } else if ( $src <= 0x007f) {
      $dest = $src;
    } else if ($src <= 0x07ff) {
      $dest = (0xc0 | ($src >> 6)) << 8;
      $dest += (0x80 | ($src & 0x003f));
    } else if($src == 0xFEFF) {
      // nop -- zap the BOM
    } else if ($src >= 0xD800 && $src <= 0xDFFF) {
      // found a surrogate
      return FALSE;
    } else if ($src <= 0xffff) {
      $dest = (0xe0 | ($src >> 12)) << 16;
      $dest += (0x80 | (($src >> 6) & 0x003f)) << 8;
      $dest += (0x80 | ($src & 0x003f));
    } else if ($src <= 0x10ffff) {
      $dest = (0xf0 | ($src >> 18)) << 24;
      $dest += (0x80 | (($src >> 12) & 0x3f)) << 16;
      $dest += (0x80 | (($src >> 6) & 0x3f)) << 8;
      $dest += (0x80 | ($src & 0x3f));
    } else {
      // out of range
      return FALSE;
    }
  }
  return strtoupper(hexpad(dechex($dest), 2));
}

function Utf8toCodepoint($str) {
  // Returns an array of DECIMAL codepoints that represent the HEX STRING $str. Reverse of Codepoint2Utf8Hex().
  $mState = 0;     // cached expected number of octets after the current octet
                   // until the beginning of the next UTF8 character sequence
  $mUcs4  = 0;     // cached Unicode character
  $mBytes = 1;     // cached expected number of octets in the current sequence

  $out = array();
//  $str=preg_replace(array('/\s+/', '/0x/i'), '', $str);
  $str=HexOnly($str);
  $len = strlen($str);
  for($i = 0; $i < $len; $i++) {
    $in=hexdec(substr($str, $i*2, 2));
    if ($mState==0) {
      // When mState is zero we expect either a US-ASCII character or a multi-octet sequence.
      if (0 == (0x80 & ($in))) {
        // US-ASCII, pass straight through.
        $out[]=strtoupper(hexpad(dechex($in)));
        $mBytes = 1;
      } else if (0xC0 == (0xE0 & ($in))) {
        // First octet of 2 octet sequence
        $mUcs4 = ($in);
        $mUcs4 = ($mUcs4 & 0x1F) << 6;
        $mState = 1;
        $mBytes = 2;
      } else if (0xE0 == (0xF0 & ($in))) {
        // First octet of 3 octet sequence
        $mUcs4 = ($in);
        $mUcs4 = ($mUcs4 & 0x0F) << 12;
        $mState = 2;
        $mBytes = 3;
      } else if (0xF0 == (0xF8 & ($in))) {
        // First octet of 4 octet sequence
        $mUcs4 = ($in);
        $mUcs4 = ($mUcs4 & 0x07) << 18;
        $mState = 3;
        $mBytes = 4;
      } else if (0xF8 == (0xFC & ($in))) {
        /* First octet of 5 octet sequence.
         * This is illegal because the encoded codepoint must be either
         * (a) not the shortest form or
         * (b) outside the Unicode range of 0-0x10FFFF.
         * Rather than trying to resynchronize, we will carry on until the end
         * of the sequence and let the later error handling code catch it.
         */
        $mUcs4 = ($in);
        $mUcs4 = ($mUcs4 & 0x03) << 24;
        $mState = 4;
        $mBytes = 5;
      } else if (0xFC == (0xFE & ($in))) {
        // First octet of 6 octet sequence, see comments for 5 octet sequence.
        $mUcs4 = ($in);
        $mUcs4 = ($mUcs4 & 1) << 30;
        $mState = 5;
        $mBytes = 6;
      } else {
        /* Current octet is neither in the US-ASCII range nor a legal first
         * octet of a multi-octet sequence.
         */
        return false;
      }
    } else {
      // When mState is non-zero, we expect a continuation of the multi-octet sequence
      if (0x80 == (0xC0 & ($in))) {
        // Legal continuation.<BR>";
        $shift = ($mState - 1) * 6; // 0 for a 2-octet combo!
        $tmp = $in;
        $tmp = ($tmp & 0x0000003F) << $shift;
        $mUcs4 |= $tmp;
        if (0 == --$mState) {
          /* End of the multi-octet sequence.
           * mUcs4 now contains the final Unicode codepoint to be output.
           * Check for illegal sequences and codepoints.
           */
          // From Unicode 3.1, non-shortest form is illegal
          if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
              ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
              ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
              (4 < $mBytes) ||
              // From Unicode 3.2, surrogate characters are illegal
              (($mUcs4 & 0xFFFFF800) == 0xD800) ||
              // Codepoints outside the Unicode range are illegal
              ($mUcs4 > 0x10FFFF)) {
            return false;
          }
          if (0xFEFF != $mUcs4) {
            // BOM is legal but we don't want to output it.
            $out[]=strtoupper(hexpad(dechex($mUcs4)));
          }
          //initialize UTF8 cache
          $mState = 0;
          $mUcs4  = 0;
          $mBytes = 1;
        }
      } else {
        /* ((0xC0 & (*in) != 0x80) && (mState != 0))
         *
         * Incomplete multi-octet sequence.
         */
        return false;
      }
    }
  }
  return $out;
}

function Utf8StringToCodepoints($str) {
  // Returns array of codepoint integers for each character in $str.
  $mState = 0;     // cached expected number of octets after the current octet
                   // until the beginning of the next UTF8 character sequence
  $mUcs4  = 0;     // cached Unicode character
  $mBytes = 1;     // cached expected number of octets in the current sequence

  $out = array();

  $len = strlen($str);
  for($i = 0; $i < $len; $i++) {
//PHP8    $in = ord($str{$i});
    $in = ord($strp[$i]);
    if (0 == $mState) {
      // When mState is zero we expect either a US-ASCII character or a multi-octet sequence.
      if (0 == (0x80 & ($in))) {
        // US-ASCII, pass straight through.
        $out[] = $in;
        $mBytes = 1;
      } else if (0xC0 == (0xE0 & ($in))) {
        // First octet of 2 octet sequence
        $mUcs4 = ($in);
        $mUcs4 = ($mUcs4 & 0x1F) << 6;
        $mState = 1;
        $mBytes = 2;
      } else if (0xE0 == (0xF0 & ($in))) {
        // First octet of 3 octet sequence
        $mUcs4 = ($in);
        $mUcs4 = ($mUcs4 & 0x0F) << 12;
        $mState = 2;
        $mBytes = 3;
      } else if (0xF0 == (0xF8 & ($in))) {
        // First octet of 4 octet sequence
        $mUcs4 = ($in);
        $mUcs4 = ($mUcs4 & 0x07) << 18;
        $mState = 3;
        $mBytes = 4;
      } else if (0xF8 == (0xFC & ($in))) {
        /* First octet of 5 octet sequence.
         *
         * This is illegal because the encoded codepoint must be either
         * (a) not the shortest form or
         * (b) outside the Unicode range of 0-0x10FFFF.
         * Rather than trying to resynchronize, we will carry on until the end
         * of the sequence and let the later error handling code catch it.
         */
        $mUcs4 = ($in);
        $mUcs4 = ($mUcs4 & 0x03) << 24;
        $mState = 4;
        $mBytes = 5;
      } else if (0xFC == (0xFE & ($in))) {
        // First octet of 6 octet sequence, see comments for 5 octet sequence.
        $mUcs4 = ($in);
        $mUcs4 = ($mUcs4 & 1) << 30;
        $mState = 5;
        $mBytes = 6;
      } else {
        /* Current octet is neither in the US-ASCII range nor a legal first
         * octet of a multi-octet sequence.
         */
        return false;
      }
    } else {
      // When mState is non-zero, we expect a continuation of the multi-octet sequence
      if (0x80 == (0xC0 & ($in))) {
        // Legal continuation.
        $shift = ($mState - 1) * 6;
        $tmp = $in;
        $tmp = ($tmp & 0x0000003F) << $shift;
        $mUcs4 |= $tmp;

        if (0 == --$mState) {
          /* End of the multi-octet sequence. mUcs4 now contains the final
           * Unicode codepoint to be output
           *
           * Check for illegal sequences and codepoints.
           */

          // From Unicode 3.1, non-shortest form is illegal
          if (((2 == $mBytes) && ($mUcs4 < 0x0080)) ||
              ((3 == $mBytes) && ($mUcs4 < 0x0800)) ||
              ((4 == $mBytes) && ($mUcs4 < 0x10000)) ||
              (4 < $mBytes) ||
              // From Unicode 3.2, surrogate characters are illegal
              (($mUcs4 & 0xFFFFF800) == 0xD800) ||
              // Codepoints outside the Unicode range are illegal
              ($mUcs4 > 0x10FFFF)) {
            return false;
          }
          if (0xFEFF != $mUcs4) {
            // BOM is legal but we don't want to output it
            $out[] = $mUcs4;
          }
          //initialize UTF8 cache
          $mState = 0;
          $mUcs4  = 0;
          $mBytes = 1;
        }
      } else {
        /* ((0xC0 & (*in) != 0x80) && (mState != 0))
         *
         * Incomplete multi-octet sequence.
         */
        return false;
      }
    }
  }
  return $out;
  // Wow. I had no idea characters were so hard to process. Live and learn.
}
?>