Commit 2a92566f authored by Martin Jonáš's avatar Martin Jonáš
Browse files

Rewrite and finish CDCL

parent c2053bb0
Loading
Loading
Loading
Loading
+100 −0
Original line number Original line Diff line number Diff line
@@ -92,3 +92,103 @@
  pages     = {343--356},
  pages     = {343--356},
  year      = {2011}
  year      = {2011}
}
}

@inproceedings{ZM88,
  author    = {Ramin Zabih and
               David A. McAllester},
  title     = {A Rearrangement Search Strategy for Determining Propositional Satisfiability},
  booktitle = {Proceedings of the 7th National Conference on Artificial Intelligence.
               St. Paul, MN, August 21-26, 1988.},
  pages     = {155--160},
  year      = {1988}
}

@article{BKS04,
  author    = {Paul Beame and
               Henry A. Kautz and
               Ashish Sabharwal},
  title     = {Towards Understanding and Harnessing the Potential of Clause Learning},
  journal   = {J. Artif. Intell. Res. {(JAIR)}},
  volume    = {22},
  pages     = {319--351},
  year      = {2004}
}

@inproceedings{MMZZM01,
  author    = {Matthew W. Moskewicz and
               Conor F. Madigan and
               Ying Zhao and
               Lintao Zhang and
               Sharad Malik},
  title     = {Chaff: Engineering an Efficient {SAT} Solver},
  booktitle = {Proceedings of the 38th Design Automation Conference, {DAC} 2001,
               Las Vegas, NV, USA, June 18-22, 2001},
  pages     = {530--535},
  year      = {2001}
}

@inproceedings{LGPC16,
  author    = {Jia Hui Liang and
               Vijay Ganesh and
               Pascal Poupart and
               Krzysztof Czarnecki},
  title     = {Learning Rate Based Branching Heuristic for {SAT} Solvers},
  booktitle = {Theory and Applications of Satisfiability Testing -- {SAT} 2016 -- 19th
               International Conference, Bordeaux, France, July 5-8, 2016, Proceedings},
  pages     = {123--140},
  year      = {2016}
}

@inproceedings{BF15branching,
  author    = {Armin Biere and
               Andreas Fr{\"{o}}hlich},
  title     = {Evaluating {CDCL} Variable Scoring Schemes},
  booktitle = {Theory and Applications of Satisfiability Testing -- {SAT} 2015 -- 18th
               International Conference, Austin, TX, USA, September 24-27, 2015,
               Proceedings},
  pages     = {405--422},
  year      = {2015}
}

@inproceedings{BF15restarts,
  author    = {Armin Biere and
               Andreas Fr{\"{o}}hlich},
  title     = {Evaluating {CDCL} Restart Schemes},
  booktitle = {Workshop on Pragmatics of SAT -- {POS'15}, Austin, TX,
                  USA, September 24-27, 2015, Proceedings},
  pages     = {405--422},
  year      = {2015}
}

@inproceedings{GSK98,
  author    = {Carla P. Gomes and
               Bart Selman and
               Henry A. Kautz},
  title     = {Boosting Combinatorial Search Through Randomization},
  booktitle = {Proceedings of the Fifteenth National Conference on Artificial Intelligence
               and Tenth Innovative Applications of Artificial Intelligence Conference,
               {AAAI} 98, {IAAI} 98, July 26-30, 1998, Madison, Wisconsin, {USA.}},
  pages     = {431--437},
  year      = {1998}
}

@article{LSZ93,
  author    = {Michael Luby and
               Alistair Sinclair and
               David Zuckerman},
  title     = {Optimal Speedup of Las Vegas Algorithms},
  journal   = {Inf. Process. Lett.},
  volume    = {47},
  number    = {4},
  pages     = {173--180},
  year      = {1993}
}

@incollection{HM09,
  author    = {Marijn Heule and
               Hans van Maaren},
  title     = {Look-Ahead Based {SAT} Solvers},
  booktitle = {Handbook of Satisfiability},
  pages     = {155--184},
  year      = {2009}
}
 No newline at end of file
+120 −85
Original line number Original line Diff line number Diff line
@@ -15,6 +15,7 @@ definition is more suitable for the bit-vector theory, which will be
introduced in the later parts of this chapter.
introduced in the later parts of this chapter.


\subsection{Propositional formulas, assignments, and satisfaction}
\subsection{Propositional formulas, assignments, and satisfaction}
\label{prelim:prop}


Let $\P$ be a fixed finite set of propositional variables. For every
Let $\P$ be a fixed finite set of propositional variables. For every
variable $x \in \P$ there are two literals -- a \emph{positive
variable $x \in \P$ there are two literals -- a \emph{positive
@@ -27,7 +28,10 @@ the \emph{conjunctive normal form} (\cnf) is a finite conjunction of
clauses. If convinient, we will use idempotence and commutativity of
clauses. If convinient, we will use idempotence and commutativity of
disjunction and view clauses as sets of literals and therefore ignore
disjunction and view clauses as sets of literals and therefore ignore
the order and multiple occurences of literals. Similarly, if
the order and multiple occurences of literals. Similarly, if
convinient, we will view \cnf formulas as sets of clauses.
convinient, we will view \cnf formulas as sets of clauses. For
example, we will write the formula
$(x \vee \overline{y}) \wedge (\overline{x} \vee z)$ as the set
$\{ \{ x , \overline{y} \}, \{ \overline{x} \vee z \} \}$.


A \emph{partial assignment} $M$ is a set of literals which does not
A \emph{partial assignment} $M$ is a set of literals which does not
contain complementary literals, i.e.
contain complementary literals, i.e.
@@ -39,9 +43,13 @@ asignment $M$ \emph{total} over $\P$ if all literals of $\P$ are
defined in $M$. A clause is \emph{true} in $M$ if at least one of its
defined in $M$. A clause is \emph{true} in $M$ if at least one of its
literals is true in $M$ and a \cnf formula is \emph{true} in $M$ if
literals is true in $M$ and a \cnf formula is \emph{true} in $M$ if
all of its clauses are true in $M$. Clause that is false for a given
all of its clauses are true in $M$. Clause that is false for a given
assignment $M$ is called a \emph{conflict clause} for $M$. For a clause
assignment $M$ is called a \emph{conflict clause} for $M$. If all
$C = x_1 \vee \ldots x_n$, the notation $\neg C$ stands for the
literals of a clause are false except of one literal that is
formula $\neg x_1 \wedge \ldots \wedge x_n$.
undefined, the clause is called \emph{unit}.

%For a clause
%$C = x_1 \vee \ldots x_n$, the notation $\neg C$ stands for the
%formula $\neg x_1 \wedge \ldots \wedge x_n$.


If a formula $F$ is true in $M$, we call $M$ a \emph{model} of $F$ and
If a formula $F$ is true in $M$, we call $M$ a \emph{model} of $F$ and
denote it as $M \models F$. A formula is \emph{satisfiable} if it has
denote it as $M \models F$. A formula is \emph{satisfiable} if it has
@@ -52,7 +60,35 @@ $F \models F'$. Formulas $F$ and $F'$ are called
\emph{equisatisfiable} if $F$ is satisfiable precisely if $F'$ is
\emph{equisatisfiable} if $F$ is satisfiable precisely if $F'$ is
satisfiable.
satisfiable.


\subsection{First-order formulas and theories}
\subsection{First-order quantifier-free formulas and theories}
In the following text, we suppose the knowledge of a standard
first-order logic and model theory notions, which can be found for
example in ???.

Definitions for the first-order quantifier-free formulas are similar
to those from subsection \ref{prelim:prop}, except that the set $\P$
now contains a fixed finite set of first-order atoms, which do not
contain free variables. For example, if we consider a set of atoms
$\P = \{ x \geq y + 1, x = 4 \}$, where $x, y, 1$, and $4$ are
constants, $x \geq y + 1~\vee~\neg (x = 4)$ is a clause.

A \emph{theory} $T$ is a set of first-order structures. A formula $F$
is $T$-\emph{satisfiable} or $T$-\emph{consistent} if there is a
structure $M \in T$ in which the formula $F$ holds in the standard
first-order sense. The formula $F$ is $T$-\emph{unsatisfiable} or
$T$-\emph{inconsistent} otherwise. In the rest of this chapter we
consider only theories for which the satisfiability of the conjunction
of literals is decidable and we will call any decision procedure for
the conjunction of the literals a $T$-\emph{solver}.

Using the same notation as in the propositional logic, a partial
assignment is a set of literals and can be therefore seen as a
conjunction of literals, i.e. a formula. If a partial assignment $M$
is a propositional model of a for a given formula $F$ and the partial
assignment $M$ is $T$-consistent, we say that $M$ is a
$T$-\emph{model} of $F$. If $F$ and $G$ are formulas such that
$F \wedge \neg G$ is $T$-inconsistent, we say that \emph{$F$ entails
  $G$ in $T$} and write it as $F \models_T G$.


\section{Propositional satisfiability}
\section{Propositional satisfiability}


@@ -79,82 +115,23 @@ even for simple formulas. To alleviate this problem, the refinement of
Loveland~\cite{DPLL62}.
Loveland~\cite{DPLL62}.


Davis--Putnam--Logemann--Loveland algorithm (\dpll) iterativelly tries
Davis--Putnam--Logemann--Loveland algorithm (\dpll) iterativelly tries
to build a satisfying assignment by searching and it backtracks if any
to build a satisfying assignment and it backtracks if any of the input
of the input clauses becomes false in the current assignment. The
clauses becomes false in the current assignment. A key procedure
search of \dpll is guided by the unit propagation (also known as
guiding the \dpll search is \emph{unit clause rule}, which is based on
boolean constraint propagation), which is based on the observation
the observation that if a formula contains a clause $C$ that is unit
that given a clause $C \vee l$ in which all literals of $C$ are false
in the current assignment, the only way to build a satisfying
in the current assignment $M$ and the literal $l$ is undefined, the
asignment the sole undefined literal of $C$ to $M$. The iterated
only way to build a satisfying asignment is to add the literal $l$ to
application of the unit clause rule is called \emph{unit propagation}
$M$.
or \emph{boolean constraint propagation} (\bcp)~\cite{ZM88}. The \dpll

search consists of decision and propagation steps. In decision steps,
As observed by Nieuwenhuis et al.~\cite{NOT06}, the \dpll algorithm
a variable and its new value are chosen. After the decision step, the
can be presented as a transition system. In this system, the states
\bcp is performed to set values of variables which are implied by the
are $\fail$ and pairs $\state{M}{F}$, where $F$ is a \cnf formula and
decision. The backtracking used in \dpll is \emph{chronological},
$M$ is a \emph{sequence} of literals, each marked as \emph{decision}
i.e. after the conflict the value of the last decided literal is
or non-decision literal. Decision literals are denoted as $\dec{l}$
changed.
and intuitively correspond to literals whose value was set arbitrarily

during the search, and hence their value can be changed to $\neg l$
%It has been observed that \dpll based solver spends the
during backtracking if necessary. We will denote a concatenation of
%majority of its computation time by doing \bcp steps, a
sequences $M$ and $N$ by a simple juxtaposition $MN$ and we will treat
literals as sequence of length 1. A transition systems further
contains a \emph{transition relation} $\Longrightarrow$, which is a
binary relation over the set of states. Instead of writing
$(s, t) \in \, \Longrightarrow$, we will write simply
$s \Longrightarrow t$. The reflexive and transitive closure of the
relation $\Rightarrow$ will be denoted as $\Rightarrow^*$. The
transition relation for the \dpll transition system is given by the
following set of rules:

\begin{align*}
  \intertext{\textsf{UnitPropagate}}
  \trans{\state{M}{F, C \vee l}}{&\state{M l}{F, C \vee l}}
                             &&\text{ if }
                                \begin{cases}
                                  M \models \neg C \\ l \text{ is undefined in } $M$
                                \end{cases}
  \intertext{\textsf{PureLiteral}}
  \trans{\state{M}{F}}{&\state{M l}{F}}
                             &&\text{ if }
                                \begin{cases}
                                  l \text{ occurs in } F \\
                                  \neg l \text{ does not occur in } F \\
                                  l \text{ is undefined in } $M$
                                \end{cases}
%\end{align*}
%\begin{align*}
  \intertext{\textsf{Decide}}
  \trans{\state{M}{F}}{&\state{M \dec{l}}{F}}
                             &&\text{ if }
                                \begin{cases}
                                  l \text{ or } \neg l \text{ occurs in } F \\
                                  l \text{ is undefined in } $M$
                                \end{cases}
  \intertext{\textsf{Fail}}
  \trans{\state{M}{F, C}}{&\fail}
                             &&\text{ if }
                                \begin{cases}
                                  M \models \neg C \\
                                  M \text{ contains no decision literals}
                                \end{cases}
  \intertext{\textsf{Backtrack}}
  \trans{\state{M \dec{l} N}{F, C}}{&\state{M \neg l}{F, C}}
                             &&\text{ if }
                                \begin{cases}
                                  M \dec{l} N \models \neg C \\
                                  N \text{ contains no decision literals}
                                \end{cases}
\end{align*}

A state $s$ is called \emph{final} if there is no state $t$ such that
$\trans{s}{t}$. It can be shown that if $F$ is a formula and $S_f$ an
arbitrary final state such that
$\transstar{\state{\emptyset}{F}}{S_f}$, then $F$ is unsatisfiable
precisely if $S_f = \fail$. Moreover, if $S_f = \state{M}{F}$, then
$M$ is a model of the formula $F$~\cite{NOT06}.

Note that the used backtracking strategy is \emph{chronological},
i.e. the value of the last decision made is changed.


\subsection{CDCL}
\subsection{CDCL}


@@ -166,10 +143,68 @@ refinement of \dpll algorithm called Conflict-Driven Clause Learning
and is a base of almost all modern \sat solvers~\cite{KSM11}. In
and is a base of almost all modern \sat solvers~\cite{KSM11}. In
addition to \dpll, \cdcl based \sat solvers have mechanisms to analyze
addition to \dpll, \cdcl based \sat solvers have mechanisms to analyze
a conflict and learn a new clause from a conflicting
a conflict and learn a new clause from a conflicting
assignment. Moreover, in contrast to a \emph{chronological
assignment. Moreover, in contrast to a chronological backtracking,
  backtracking}, in which the value of the most recent decision is
\cdcl solvers can perform \emph{non-chronological backtracking} to an
changed, \cdcl solvers can perform \emph{non-chronological
earlier decision literal.
  backtracking} to an earlier decision literal.

In particular, \cdcl based \sat solvers maintain \emph{implication
  graph} during the search, which records reasons for assigning the
unit-propagated literals during the search. If the conflict clause is
found, a clause which captures the reason of the conflict is computed
by analyzing of the implication graph or equivalently by the backwards
resolution process~\cite{BKS04}. After computing the reason of the
conflict, \cdcl based solvers \emph{learn} the corresponding clause,
in order not to repeat the similar conflict in the future, and change
the value of the last decision literal which makes the clause
true. Note that this literal does not have to be the last decision
literal in the search, and therefore a bigger region of the search
space can be ruled out from the search, as it is known not to contain
a solution. There are multiple strategies to computing the clausal
reason for the conflict, but the majority of \cdcl based \sat solvers
are using the first \emph{unique implication point} based learning
scheme, which has been shown to produce small clauses in the
practice~\cite{MMZZM01}.

Beside the learning, the efficiency of the most \cdcl based \sat
solvers relies on \emph{branching heuristics}, which select the next
decision variable during the search. Multiple branching heuristics
have been proposed~\cite{BF15branching} and the majority of modern
\sat solver are using VSIDS heuristic~\cite{MMZZM01}, which prefers
the variables which have appeared in the biggest number of recent
conflicts. More recently, new branching heuristics based on learning
rate have been proposed and shown to outperform currently used VSIDS
branching heuristics~\cite{LGPC16}.

Another important part of modern \sat solvers are dynamic restarts,
which restart the search in hope that clauses learned during the
conflict analyses will guide the search from regions of the search
space which contain no solutions~\cite{GSK98}. Although most commonly
used heuristic to decide when to restart the solver is based on the
Luby sequence~\cite{LSZ93}, the recent survey has shown that it is
outperformed by a heuristic based on the concept of exponential moving
averages~\cite{BF15restarts}.\marginpar{spravit citaci, BF15a nevyšlo
  v proceedings}

In addition to the mentioned heuristics, an efficient implementation
of \cdcl based \sat solver relies on lazy data structures used in
boolean constraint propagation. This is important because it has been
observed \sat solvers spend most of the computation time doing
\bcp. Therefore, most of the modern \sat solvers use \emph{two-watched
  literal scheme}~\cite{MMZZM01} to efficiently identify clauses which
has become unit during the search.

The contribution of all mentioned parts of the \cdcl based \sat
solvers to the overall performance of the MiniSAT
solver~\cite{Minisat} has been experimentally evaluated by Katebi,
Sakallah, and Marques-Silva~\cite{KSM11}. The experimental evaluation
shows that the most important from the mentioned techniques is
clause-learning, followed by the VSIDS branching heuristic.

\subsection{Other approaches to \sat}
Aside from conflict-driven \sat algorithms, other variants of
approaches to \sat have been proposed. Most notable of those variants
are symbolic \sat solvers and look-ahead based \sat
solvers~\cite{HM09}.


\section{Satisfiability modulo theories}
\section{Satisfiability modulo theories}


+1 −0
Original line number Original line Diff line number Diff line
@@ -5,6 +5,7 @@
\newcommand{\dppr}{\textsc{dp}\xspace}
\newcommand{\dppr}{\textsc{dp}\xspace}
\newcommand{\dpll}{\textsc{dpll}\xspace}
\newcommand{\dpll}{\textsc{dpll}\xspace}
\newcommand{\cdcl}{\textsc{cdcl}\xspace}
\newcommand{\cdcl}{\textsc{cdcl}\xspace}
\newcommand{\bcp}{\textsc{bcp}\xspace}


\newcommand{\state}[2]{\ensuremath{#1 \; || \; #2}\xspace}
\newcommand{\state}[2]{\ensuremath{#1 \; || \; #2}\xspace}
\newcommand{\dec}[1]{\ensuremath{#1^\bullet}}
\newcommand{\dec}[1]{\ensuremath{#1^\bullet}}
+3 −0
Original line number Original line Diff line number Diff line
@@ -63,6 +63,9 @@
\newcommand{\Ie}{I.\,e.}
\newcommand{\Ie}{I.\,e.}
\newcommand{\eg}{e.\,g.}
\newcommand{\eg}{e.\,g.}
\newcommand{\Eg}{E.\,g.}
\newcommand{\Eg}{E.\,g.}

\hyphenation{sche-mes}

\input{Includes/notation.tex}
\input{Includes/notation.tex}
% ****************************************************************************************************
% ****************************************************************************************************


+1 −1
Original line number Original line Diff line number Diff line
@@ -704,7 +704,7 @@
        \PassOptionsToPackage{draft}{prelim2e}
        \PassOptionsToPackage{draft}{prelim2e}
        \RequirePackage{prelim2e}
        \RequirePackage{prelim2e}
        \renewcommand{\PrelimWords}{\relax}
        \renewcommand{\PrelimWords}{\relax}
        \renewcommand{\PrelimText}{\footnotesize[\,\today\ at \thistime\ -- \texttt{classicthesis}~\myVersion\,]}
        \renewcommand{\PrelimText}{\footnotesize[\,DRAFT -- \today\ at \thistime\,]}
}{\renewcommand{\finalVersionString}{\emph{Final Version} as of \today\ (\texttt{classicthesis}~\myVersion).}}
}{\renewcommand{\finalVersionString}{\emph{Final Version} as of \today\ (\texttt{classicthesis}~\myVersion).}}


% ********************************************************************
% ********************************************************************
Loading