From 5fc644162637796667d417907f373e5e8c7b2acb Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Wed, 20 Jun 2018 22:29:17 +0100 Subject: [PATCH] Paper: restructured the IRE construction example. --- re2c/doc/tdfa_v2/TODO | 43 ++++++ re2c/doc/tdfa_v2/img/Makefile | 2 +- re2c/doc/tdfa_v2/img/mark_enum.tex | 204 +++++++++++++++++++++++++++++ re2c/doc/tdfa_v2/part_1_tnfa.tex | 31 +++-- 4 files changed, 268 insertions(+), 12 deletions(-) create mode 100644 re2c/doc/tdfa_v2/img/mark_enum.tex diff --git a/re2c/doc/tdfa_v2/TODO b/re2c/doc/tdfa_v2/TODO index 92058d78..cc868f5e 100644 --- a/re2c/doc/tdfa_v2/TODO +++ b/re2c/doc/tdfa_v2/TODO @@ -12,3 +12,46 @@ Rephrase that paragrapgh to say that the minimal tree for fully-tagged trees is included in the class of minimal trees for partially-tagged trees. + + +> The definition of lastht() and first() mentions the case in +> which the argument begins with a letter. +> However, they seems to be applied only to frames (that do not +> contain letters). Are there cases in +> which they are applied to something other than frames? + + +> P.S. I am still a bit uncertain about the usefulness of the +> subdivision in frames of PEs. Its usefulness is +> obvious in the case of Okui's PAT because them have sequences of +> parentheses on the arcs, corresponding +> to frames (and have no closure to be done). But in our case we +> have GOR1 that compares fragments +> of PEs that are not frames, and still uses a notion of rho that +> is somehow different. +> So, my question is how will we make use of frames in our context? + + + + + +> I was thinking how to demonstrate Theorem 1. This is an attempt to do it. +> +> Taken two distinct trees s and t, since the norms are defined over +> Pos(s) union Pos(t), +..well, strictly speaking, norms are defined everywhere: that's one of the differences with Okui definition of the norm, and it comes in really handy in the proof of theorem 2 (the one that states that partial order on IRE is a strict weak order): without that extension of the norm to the infinite set of possible positions, the proofs get twice as long (because we have to track domains everywhere and prove obvious things that follow from fact that ||s||_p = ||t||_p if p is not in Pos(s) and not in Pos(t)). +> then it is either s < t or t < s (i.e. the ordering is total). +> Suppose it is s < t. Then, there is a position p for which ||s||_p > +> ||t||_p and all preceding +> positions have equal norms. Clearly, it cannot also be t < s. +..agreed.. +> Suppose it is t < s, then for the same reason, it cannot be s < t. +> Then, the ordering is strict. +..this is, I think, not needed: we just need to say, "without loss of generality, suppose that s < t". +I will put that in the paper. + + + +OKUI SLIDES!!! + + diff --git a/re2c/doc/tdfa_v2/img/Makefile b/re2c/doc/tdfa_v2/img/Makefile index 00576408..c2933cfa 100644 --- a/re2c/doc/tdfa_v2/img/Makefile +++ b/re2c/doc/tdfa_v2/img/Makefile @@ -1,7 +1,7 @@ %.pdf : %.tex lualatex -shell-escape $< $<.build_log -all : trees.pdf tnfa.pdf gor1.pdf +all : mark_enum.pdf trees.pdf tnfa.pdf gor1.pdf clean: rm *.log *.build_log *.aux *.pdf *.pag diff --git a/re2c/doc/tdfa_v2/img/mark_enum.tex b/re2c/doc/tdfa_v2/img/mark_enum.tex new file mode 100644 index 00000000..27cbe1c3 --- /dev/null +++ b/re2c/doc/tdfa_v2/img/mark_enum.tex @@ -0,0 +1,204 @@ + +\documentclass[tikz,border=10pt]{standalone} + + +\RequirePackage{luatex85} +\usepackage[utf8]{inputenc} +\usepackage{amsmath, amssymb, amsfonts, accents} +\usetikzlibrary{graphdrawing, graphs, arrows, shapes, automata, calc} +\usegdlibrary{trees, layered} +\usepackage{stix} + + +\newcommand{\Xund}{\rule{.4em}{.4pt}} +\newcommand{\IRE}{I\!RE} + + +\begin{document} + +\begin{tikzpicture}[>=stealth, ->, auto, node distance=0.2in] + +\tikzstyle{every node}=[draw=none] + +% $(\epsilon|a^{0,\infty})(a|\epsilon)^{0,\infty}$ + +\begin{scope}[xshift=-1.1in] + \node (a) {\small{ + $\begin{aligned} + & mark ( (\epsilon|a^{0,\infty})(\epsilon|a)^{0,\infty} ) ) = \big[ \\ + % + & \quad mark ( (\epsilon|a^{0,\infty}) ) = \big[ \\ + & \quad mark ( \epsilon|a^{0,\infty} ) = \big[ \\ + & \quad\quad\quad mark ( \epsilon ) = (0,0,\epsilon), \\ + & \quad\quad\quad mark ( a^{0,\infty} ) = \big[ \\ + & \quad\quad\quad\quad mark ( a ) = (0,0,a) \\ + & \quad\quad\quad \big] = (0,0,(0,0,a)^{0,\infty}) \\ + & \quad\quad \big] = (0,0,(0,0,\epsilon) \mid (0,0,(0,0,a)^{0,\infty})) \\ + & \quad \big] = (1,1,(0,0,\epsilon)|(0,0,(0,0,a)^{0,\infty})) \\ + % + & \quad mark ( (\epsilon|a)^{0,\infty} ) = \big[ \\ + & \quad\quad mark ( (\epsilon|a) ) = \big[ \\ + & \quad\quad\quad mark ( \epsilon|a ) = \big [ \\ + & \quad\quad\quad\quad mark ( \epsilon ) = (0,0,\epsilon) \\ + & \quad\quad\quad\quad mark ( a ) = (0,0,a) \\ + & \quad\quad\quad \big] = (0,0,(0,0,\epsilon)|(0,0,a)) \\ + & \quad\quad \big] = (1,1,(0,0,\epsilon) \mid (0,0,a)) \\ + & \quad \big] = (1,0,(1,1,(0,0,\epsilon) \mid (0,0,a))^{0,\infty}) \\ + % + & \big] = (1,1, + (1,1,(0,0,\epsilon) \mid (0,0,(0,0,a)^{0,\infty})) \\ + & \quad\quad\quad \cdot (1,0,(1,1,(0,0,\epsilon) \mid (0,0,a))^{0,\infty}) + ) + \end{aligned}$ + }}; +\end{scope} + +\begin{scope}[xshift=1.6in, yshift=0.3in] + \node (a) {\small{ + $\begin{aligned} + & enum (1,1,(1,1, + (1,1,(0,0,\epsilon) \mid (0,0,(0,0,a)^{0,\infty})) \\ + & \quad\quad\quad\quad\quad \cdot (1,0,(1,1,(0,0,\epsilon) \mid (0,0,a))^{0,\infty}))) = \big[ \\ + & \quad enum (2,2,(1,1,(0,0,\epsilon) \mid (0,0,(0,0,a)^{0,\infty}))) = \big[ \\ + & \quad\quad enum (3,3,(0,0,\epsilon)) = (3,3,(0,0,\epsilon)) \\ + & \quad\quad enum (3,3,(0,0,(0,0,a)^{0,\infty})) = \big[ \\ + & \quad\quad\quad enum (3,3,(0,0,a)) = (3,3,(0,0,a)) \\ + & \quad\quad \big] = (3,3,(0,0,(0,0,a)^{0,\infty})) \\ + & \quad \big] = (3,3,(2,2,(0,0,\epsilon) \mid (0,0,(0,0,a)^{0,\infty}))) \\ + % + & \quad enum (3,3, (1,0,(1,1,(0,0,\epsilon) \mid (0,0,a))^{0,\infty}) ) = \big[ \\ + & \quad\quad enum (4,3, (1,1,(0,0,\epsilon) \mid (0,0,a)) ) = \big[ \\ + & \quad\quad\quad enum (5,4, (0,0,\epsilon) ) = (0,0,\epsilon) \\ + & \quad\quad\quad enum (5,4, (0,0,a) ) = (0,0,a) \\ + & \quad\quad \big] = (5,4,(4,3,(0,0,\epsilon) \mid (0,0,a))) \\ + & \quad \big] = (5,4,(3,0,(4,3,(0,0,\epsilon) \mid (0,0,a))^{0,\infty})) \\ + % + & \big] = (5,4,(1,1, + (2,2,(0,0,\epsilon) \mid (0,0,(0,0,a)^{0,\infty})) \\ + & \quad\quad\quad\quad\quad \cdot (3,0,(4,3,(0,0,\epsilon) \mid (0,0,a))^{0,\infty}) + )) + \end{aligned}$ + }}; +\end{scope} + +\begin{scope}[xshift=0.3in, yshift=-2.2in] + \node[xshift=0in, draw=none] + {\small{$ + \IRE ((\epsilon|a^{0,\infty})(\epsilon|a)^{0,\infty} )) = (1,1,(2,2,(0,0,\epsilon) \mid (0,0,(0,0,a)^{0,\infty})) + \cdot (3,0,(4,3,(0,0,\epsilon) \mid (0,0,a))^{0,\infty})) + $}}; +\end{scope} + +\begin{scope}[xshift=0.3in, yshift=-2.6in] + \graph [tree layout, grow=down, fresh nodes] { + "${(1, 1, \cdot)}_{\Lambda}$" -- { + "${(2, 2, |)}_{1}$" -- { + "${(0, 0, \epsilon)}_{1.1}$"[draw=none], + "${(0, 0, \{0,\infty\})}_{1.2}$"[draw=none] -- { + "${(0, 0, a)}_{1.2.1}$"[draw=none] + } + }, + "${(3, 0, \{0,\infty\})}_{2}$" -- { + "${(4, 3, |)}_{2.1}$" -- { + "${(0, 0, a)}_{2.1.1}$"[draw=none], + "${(0, 0, \epsilon)}_{2.1.2}$"[draw=none] + } + } + } + }; +\end{scope} + +\begin{scope}[xshift=-2in, yshift=-4.2in] + \node[xshift=-0.3in, draw=none] {$s:$}; + \graph [tree layout, grow=down, fresh nodes] { + "${T}^{1}_{\Lambda}$" -- { + "${T}^{2}_{1}$" -- { + "${\varnothing}^{0}_{1.1}$"[draw=none], + "${T}^{0}_{1.2}$"[draw=none] -- { + "${a}^{0}_{1.2.1}$"[draw=none] + } + }, + "${T}^{3}_{2}$" -- { + "${\varnothing}^{4}_{2.1}$" + } + } + }; + \node[yshift=-1.5in, draw=none] + {\small{${T}^{1} \big( + {T}^{2} \big( + {\varnothing}^{0}, + {T}^{0}({a}^{0}) + \big), + {T}^{3}( + {\varnothing}^{4} + ) + \big)$}}; +\end{scope} + +\begin{scope}[xshift=0in, yshift=-4.2in] + \node[xshift=-0.3in, draw=none] {$t:$}; + \graph [tree layout, grow=down, fresh nodes] { + "${T}^{1}_{\Lambda}$" -- { + "${T}^{2}_{1}$" -- { + "${\varnothing}^{0}_{1.1}$"[draw=none], + "${T}^{0}_{1.2}$"[draw=none] -- { + "${\varnothing}^{0}_{1.2.1}$"[draw=none] + } + }, + "${T}^{3}_{2}$" -- { + "${T}^{4}_{2.1}$" -- { + "${a}^{0}_{2.1.1}$"[draw=none], + "${\varnothing}^{0}_{2.1.2}$"[draw=none] + } + } + } + }; + \node[yshift=-1.5in, draw=none] + {\small{${T}^{1}\big( + {T}^{2}( + {\varnothing}^{0}, + {T}^{0}({\varnothing}^{0}) + ), + {T}^{3}\big( + {T}^{4}({a}^{0},{\varnothing}^{0}) + \big) + \big)$}}; +\end{scope} + +\begin{scope}[xshift=2.2in, yshift=-4.2in] + \node[xshift=-0.3in, draw=none] {$u:$}; + \graph [tree layout, grow=down, fresh nodes] { + "${T}^{1}_{\Lambda}$" -- { + "${T}^{2}_{1}$" -- { + "${\epsilon}^{0}_{1.1}$"[draw=none], + "${\varnothing}^{0}_{1.2}$"[draw=none] + }, + "${T}^{3}_{2}$" -- { + "${T}^{4}_{2.1}$" -- { + "${a}^{0}_{2.1.1}$"[draw=none], + "${\varnothing}^{0}_{2.1.2}$"[draw=none] + }, + "${T}^{4}_{2.2}$" -- { + "${\varnothing}^{0}_{2.2.1}$"[draw=none], + "${\epsilon}^{0}_{2.2.2}$"[draw=none] + } + } + } + }; + \node[xshift=0.2in, yshift=-1.5in, draw=none] + {\small{${T}^{1}\big( + {T}^{2}( + {\epsilon}^{0}, + {\varnothing}^{0} + ), + {T}^{3}\big( + {T}^{4}({a}^{0},{\varnothing}^{0}), + {T}^{4}({\varnothing}^{0}, {\epsilon}^{0}) + \big) + \big)$}}; +\end{scope} + +\end{tikzpicture} + +\end{document} + diff --git a/re2c/doc/tdfa_v2/part_1_tnfa.tex b/re2c/doc/tdfa_v2/part_1_tnfa.tex index 69f6ca9b..4745ecd6 100644 --- a/re2c/doc/tdfa_v2/part_1_tnfa.tex +++ b/re2c/doc/tdfa_v2/part_1_tnfa.tex @@ -35,6 +35,7 @@ \usepackage{enumitem} \usepackage[justification=centering]{caption} \usepackage{url} +\usepackage[section]{placeins} \newcommand{\Xl}{\langle} \newcommand{\Xr}{\rangle} @@ -402,12 +403,15 @@ $\IRE(e) = r$ where $(\Xund, \Xund, r) = enum(1, 1, mark(e))$. \end{aligned} \end{align*} +An example of constructing an IRE from a RE is given on figure \ref{fig_mark_enum}. The reverse transformation is also possible: we can transform IRE back to RE by erasing all indices and adding parentheses around subexpressions with nonzero explicit submatch index. Therefore RE and IRE are equivalent representations. -% +\\ + +\iffalse Below is an example of IRE construction for RE $(\epsilon|a^{0,\infty})(\epsilon|a)^{0,\infty} )$ (the resulting IRE can be seen on figure \ref{fig_parse_trees}): @@ -463,6 +467,17 @@ Below is an example of IRE construction for RE $(\epsilon|a^{0,\infty})(\epsilon )) \end{aligned} \end{align*} +\fi + +%\iffalse +\begin{figure}\label{fig_mark_enum} +\includegraphics[width=\linewidth]{img/mark_enum.pdf} +\caption{ +The process of constructing IRE for RE $(\epsilon|a^{0,\infty})(\epsilon|a)^{0,\infty} )$ using $mark$ and $enum$,\\ +the resulting IRE and examples of IPT for it. +} +\end{figure} +%\fi Just like REs denote sets of PTs, IREs denote sets of \emph{IPTs} --- \emph{indexed parse trees}. The only difference between PTs and IPTs is that each node of an IPT has an associated number --- @@ -481,16 +496,6 @@ Explicit submatch index is not used in IPTs. \end{enumerate} \end{Xdef} -\iffalse -\begin{figure}\label{fig_mark_enum} -\includegraphics[width=\linewidth]{img/mark_enum.pdf} -\caption{ -The process of constructing IRE for RE $(\epsilon|a^{0,\infty})(\epsilon|a)^{0,\infty} )$ -and the result visualized as a tree. -} -\end{figure} -\fi - The operator $\IPT: \XIR_\Sigma \rightarrow 2^{\XIT_\Sigma}$ gives the set of all IPTs denoted by the given IRE: %(its definition is very similar to the one of operator $PT$): \begin{align*} @@ -521,6 +526,9 @@ is the subset of $Pos(t)$ containing positions of subtrees with nonzero submatch $Sub(t) = \{ p \mid \exists t|_p = s^i \text{ such that } i \neq 0 \}$. Examples of IPT can be seen on figure \ref{fig_parse_trees}. +\FloatBarrier + +\iffalse \begin{figure}\label{fig_parse_trees} \includegraphics[width=\linewidth]{img/trees.pdf} \caption{ @@ -535,6 +543,7 @@ $s <_1 u$, $u <_{1.1} t$. } \end{figure} +\fi % \begin{Xdef} % \emph{Lexicographic order on positions.} -- 2.40.0