<?xml version="1.0" encoding="US-ASCII"?>
<!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
	<!ENTITY % rfc2629 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.2629.xml'>
	<!ENTITY % rfc3654 PUBLIC ''
"http://xml.resource.org/public/rfc/bibxml/reference.RFC.3654.xml">
	<!ENTITY % rfc3746 PUBLIC ''
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3746.xml'>
	<!ENTITY % rfc3758 PUBLIC ''
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3758.xml'>
	<!ENTITY % rfc3768 PUBLIC ''
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3768.xml'>
	<!ENTITY % rfc4960 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.4960.xml'>
	<!ENTITY % rfc3554 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3554.xml'>
	<!ENTITY % rfc2409 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.2409.xml'>
	<!ENTITY % rfc4301 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.4301.xml'>
	<!ENTITY % rfc4303 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.4303.xml'>
	<!ENTITY % rfc2404 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.2404.xml'>
	<!ENTITY % rfc3602 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3602.xml'>
	<!ENTITY % rfc5226 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.5226.xml'>
	<!ENTITY % sctpapi PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml3/reference.I-D.ietf-tsvwg-sctpsocket.xml'>
	<!ENTITY % rfc5810 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.5810.xml'>
	<!ENTITY % rfc5812 PUBLIC '' 
'http://xml.resource.org/public/rfc/bibxml/reference.RFC.5812.xml'>
]>
<?rfc toc="yes"?>
<?rfc tocompact="yes"?>
<?rfc tocdepth="3"?>
<?rfc tocindent="yes"?>
<?rfc symrefs="yes"?>
<?rfc sortrefs="yes"?>
<?rfc comments="yes"?>
<?rfc inline="yes"?>
<?rfc compact="yes"?>
<?rfc subcompact="no"?>
<?rfc strict="no"?>
<rfc category="std" ipr="trust200902" docName="draft-ietf-forces-ceha-03">
	<front>
		<title abbrev="ForCES Intra-NE High Availability">
		  ForCES Intra-NE High Availability
	  </title>
		<author fullname="Kentaro Ogawa" initials="K." surname="Ogawa">
			<organization>NTT Corporation</organization>
			<address>
				<postal>
					<street>3-9-11 Midori-cho</street>
					<city>Musashino-shi, Tokyo</city>
					<code>180-8585</code>
					<country>Japan</country>
				</postal>
				<email>ogawa.kentaro@lab.ntt.co.jp</email>
			</address>
		</author>
		<author fullname="Weiming Wang " initials="W. " surname="M. Wang">
			<organization>Zhejiang Gongshang University</organization>
			<address>
				<postal>
					<street>149 Jiaogong Road</street>
					<city>Hangzhou</city>
					<code>310035</code>
					<country>P.R.China</country>
				</postal>
				<phone>+86-571-88057712</phone>
				<email>wmwang@mail.zjgsu.edu.cn</email>
			</address>
		</author>
		<author fullname="Evangelos Haleplidis" initials="E." surname="Haleplidis">
			<organization>University of Patras</organization>
			<address>
				<postal>
					<city>Patras</city>
					<country>Greece</country>
				</postal>
				<email>ehalep@ece.upatras.gr</email>
			</address>
		</author>
		<author fullname="Jamal Hadi Salim" initials="J." surname="Hadi Salim">
			<organization>Mojatatu Networks</organization>
			<address>
				<postal>
					<city>Ottawa, Ontario</city>
					<country>Canada</country>
				</postal>
				<email>hadi@mojatatu.com</email>
			</address>
		</author>
		<date year="2012"/>
		<area>Routing</area>
		<keyword>RFC</keyword>
		<keyword>Request for Comments</keyword>
		<keyword>I-D</keyword>
		<keyword>Internet-Draft</keyword>
		<keyword>ForCES</keyword>
		<keyword>HA</keyword>
		<keyword/>
		<abstract>
			<t>
This document discusses CE High Availability within a ForCES
NE.
    </t>
		</abstract>
	</front>
	<middle>
		<section title="Definitions">
			<t>
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
"SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in
this document are to be interpreted as described in RFC 2119.

</t>
			<t>
		          
The following definitions are taken from <xref target="RFC3654"/>and
<xref target="RFC3746"/>:
<t>
Logical Functional Block (LFB)  -- 
A template that represents a fine-grained, logically separate
aspects of FE processing.
<t>
ForCES Protocol -- 
The protocol used at the Fp reference point in the ForCES 
Framework in <xref target="RFC3746"/>.  
</t>
				</t>
				<t>
ForCES Protocol Layer (ForCES PL) -- 
A layer in the ForCES architecture that embodies the ForCES protocol
and the state transfer mechanisms as defined in
<xref target="RFC5810"/>.

</t>
				<t>
ForCES Protocol Transport Mapping Layer (ForCES TML) -- A layer in 
ForCES protocol architecture that specifically addresses the 
protocol message transportation issues, such as how the protocol 
messages are mapped to different transport media (like SCTP, IP, 
TCP, UDP, ATM, Ethernet, etc), and how to achieve and implement 
reliability, security, etc.
</t>
			</t>
		</section>
		<section title="Introduction">
			<figure anchor="ne_arch" title="ForCES Architecture">
				<preamble>
					<xref target="ne_arch"/> illustrates a ForCES
NE controlled by a set of redundant CEs with CE1 being active
and CE2 and CEn-1 being a backup.
</preamble>
				<artwork><![CDATA[
                        -----------------------------------------
                        | ForCES Network Element                |
                        |                        +-----------+  |
                        |                        |  CEn-1    |  |
                        |                        |  (Backup) |  |
  --------------   Fc   | +------------+      +------------+ |  |
  | CE Manager |--------+-|     CE1    |------|    CE2     |-+  |
  --------------        | |  (Active)  |  Fr  |  (Backup)  |    |
        |               | +-------+--+-+      +---+---+----+    |
        | Fl            |         |  |    Fp      /   |         |
        |               |         |  +---------+ /    |         |
        |               |       Fp|            |/     |Fp       |
        |               |         |            |      |         |
        |               |         |      Fp   /+--+   |         |
        |               |         |  +-------+    |   |         |
        |               |         |  |            |   |         |
  --------------    Ff  | --------+--+--      ----+---+----+    |
  | FE Manager |--------+-|     FE1    |  Fi  |     FE2    |    |
  --------------        | |            |------|            |    |
                        | --------------      --------------    |
                        |   |  |  |  |          |  |  |  |      |
                        ----+--+--+--+----------+--+--+--+-------
                            |  |  |  |          |  |  |  |
                            |  |  |  |          |  |  |  |
                              Fi/f                   Fi/f

       Fp: CE-FE interface
       Fi: FE-FE interface
       Fr: CE-CE interface
       Fc: Interface between the CE Manager and a CE
       Ff: Interface between the FE Manager and an FE
       Fl: Interface between the CE Manager and the FE Manager
       Fi/f: FE external interface
]]></artwork>
			</figure>
<t>
The ForCES architecture allows FEs to be aware of multiple CEs
but enforces that only one CE be the master controller. This is known
in the industry as 1+N redundancy. The master CE controls the 
FEs via the ForCES protocol operating in the Fp interface. If the master CE
becomes faulty, a backup CE takes over and NE operation continues.
By definition, the current documented setup is known as cold-standby.
The CE set is static and is passed to the FE by the FE Manager (FEM)
via the Ff interface and to each CE by the CE Manager (CEM) in the Fc 
interface during the pre-association phase.

</t>
			<t>
From an FE perspective, the knobs of control for a CE set are defined 
by the FEPO LFB in <xref target="RFC5810"/>, Appendix B. 
<xref target="HAnow"/> of this document details these knobs further.
</t>
<section title="Document Scope" anchor="scope">
<t>
It is assumed that the reader is aware of the ForCES architecture to
make sense of the changes made here. This document provides minimal
background to set the context of the discussion in <xref target="hsb"/>.
</t>
<t>
By current definition, the Fr interface is out of scope for the
ForCES architecture.  However, it is expected that organizations 
implementing a set of CEs will need to have the CEs communicate
to each other via the Fr interface in order to achieve the 
synchronization necessary for controlling the FEs.
</t>
				<t>
The problem scope addressed by this document
falls into 2 areas:
<list style="numbers">
						<t>
To describe with more clarity (than <xref target="RFC5810"/>) how
current cold-standby approach operates within the NE cluster. 
</t>
						<t>
To describe how to evolve the cold-standby setup to a hot-standby
redundancy setup so as to improve the failover time and NE availability.
</t>
						<!-- removed from WG doc version 0
<t>
To describe a minimalist approach for Fr plane 
communication which interacting CEs MAY use for both cold and hot
standby.
</t>
-->
					</list>
				</t>
</section>
<section title="Quantifying Problem Scope" anchor="quant">
<t>
The NE recovery and availability is dependent on several time-sensitive
metrics:
<list style="numbers">
						<t>
       How fast the CE plane failure is detected the FE.
       </t>
						<t>
	How fast a backup CE becomes operational.
       </t>
						<t>
	 How fast the FEs associate with the new master CE.
       </t>
						<t>
	 How fast the FEs recover their state and become operational.
       </t>
					</list>
				</t>
				<t>
The design goals of the current <xref target="RFC5810"/> choices
to meet the above goals are driven by desire for simplicity.
</t>
<t>
To quantify the above criteria with the current prescribed 
ForCES CE setup in <xref target="RFC5810"/>: 
<list style="numbers">
<t>
How fast the CE side detects a CE failure is left undefined. To
illustrate an extreme scenario, we could have a human operator
acting as the monitoring entity to detect faulty CEs. How fast such
detection happens could be in the range of seconds to days. A more
active monitor on the Fr interface could improve this detection. 
<!--
In <xref target="CEsync"/> we define a behavior on Fr 
interface to detect CE failures in order to improve things.
-->
						</t>
						<t>
How fast the backup CE becomes operational is also currently out of scope.
In the current setup, a backup CE need not be operational at all
(for example, to save power) and therefore it is feasible for a 
monitoring entity to boot up a backup CE after it detects the 
failure of the master CE. In this document <xref target="hsb"/>
we suggest that at least one backup CE be online so as to improve this
metric.
</t>
						<t>
How fast an FE associates with new master CE is also currently undefined.
The cost of an FE connecting and associating adds to the recovery overhead. 
As mentioned above we suggest having at least one backup CE online.
In <xref target="hsb"/> we propose to zero out the connection and 
association cost on failover by having each FE associate with all 
online backup CEs after associating to the active CE.
Note that if an FE pre-associates with backup CEs, then the system will be 
technically operating in hot-standby mode.
</t>
						<t>
And last: How fast an FE recovers its state depends on how much NE state
exists. By ForCES current definition, the new master CE assumes zero state
on the FE and starts from scratch to update the FE. 
So the larger the state, the longer the recovery.
<!--
In <xref target="CEsync"/> we propose to improve this metric 
by having the master CE and backup CEs synchronizing in the Fr plane.
-->
						</t>
					</list>
				</t>
			</section>
		</section>
		<section title="RFC5810 CE HA Framework" anchor="curr">
			<t>
To achieve CE High Availabilty, FEs and CEs MUST inter-operate per 
<xref target="RFC5810"/> definition
which is repeated for contextual reasons in <xref target="HAnow"/>. 
It should be noted that in this default setup, which MUST be 
implemented by CEs and FEs needing HA, the Fr plane is out of 
scope (and if available is proprietary to an implementation).
</t>
	<section anchor="HAnow" title="Current CE High Availability Support">
				<t>
     As mentioned earlier, although there can be multiple redundant CEs,
     only one CE actively  controls FEs in a ForCES NE. In practice 
     there may be only one backup CE.
     At any moment in time only one master CE can control the FEs. 
     In addition, the
     FE connects and associates to only the master CE. The FE and the CE PL
     are aware of the primary and one or more secondary CEs. 
     This information (primary,
     secondary CEs) is configured on the FE and the CE PLs during
     pre-association by the FEM and the CEM respectively.
     </t>
				<t>
					<xref target="seq_HA_Report_Primary"/> below 
	      illustrates the Forces message sequences that the FE
	      uses to recover the connection in current defined
             cold-standby scheme.
      </t>
	<figure anchor="seq_HA_Report_Primary" title="CE Failover for Cold Standby">
					<artwork><![CDATA[


      FE                   CE Primary        CE Secondary
      |                       |                    |
      |  Asso Estb,Caps exchg |                    |
    1 |<--------------------->|                    |
      |                       |                    |
      |       state update    |                    |
    2 |<--------------------->|                    |
      |                       |                    |
      |                       |                    |
      |                   FAILURE                  |
      |                                            |
      |         Asso Estb,Caps exchange            |
    3 |<------------------------------------------>|
      |                                            |
      |              Event Report (pri CE down)    |
    4 |------------------------------------------->|
      |                                            |
      |         state update from scratch          |
    5 |<------------------------------------------>|

 ]]></artwork>
				</figure>
				<section anchor="FEPOHA" title="Cold Standby Interaction with ForCES Protocol">
<t>
High Availability parameterization in an FE is 
driven by configuring the FE Protocol Object (FEPO) LFB. 
</t>
<t>
    The FEPO CEID component identifies the current master CE and the
    component table BackupCEs identifies the backup CEs.
    The FEPO FE Heartbeat Interval, CE Heartbeat Dead Interval,
    and CE Heartbeat policy help in detecting connectivity
    problems between an FE and CE. The CE Failover policy defines
    how the FE should react on a detected failure.
</t>
<t>
<xref target="FESM"/> illustrates the defined state machine that facilitates connection
recovery.
</t>
<t>
The FE connects to the CE  specified on FEPO CEID component.
If it fails to connect to the defined CE, it moves it
to the bottom of table BackupCEs and sets its CEID
component to be the first CE retrieved from table BackupCEs.
The FE then attempts to associate with the CE designated as the new
primary CE. The FE continues through this procedure until it 
successfully connects to one of the CEs.
</t>
<figure anchor="FESM" title="FE State Machine considering HA">
					<artwork><![CDATA[
                             FE tries to associate    
                                  +-->-----+
                                  |        |
    (CE issues Teardown ||    +---+--------v----+     
       Lost association) &&   | Pre-Association |
      CE failover policy = 0  | (Association    |     
          +------------>-->-->|   in            +<----+
          |                   | progress)       |     |
          |     CE Issues     +--------+--------+     |
          |     Association        |                  | CEFTI
          |       Response         V                  | timer 
          |     ___________________+                  | expires
          |     |                                     ^             
          |     V                                     |    
        +-+-----------+                        +------+-----+     
        |             |                        |  Not       |     
        |             | (CE issues Teardown || | Associated |     
        |             |  Lost association) &&  |            +->---+     
        | Associated  | CE Failover Policy = 1 |(May        | FE  |
        |             |                        | Continue   |try  v 
        |             |-------->------->------>| Forwarding)|assn | 
        |             |                        |            |-<---+ 
        |             |                        |            |
        +-------------+                        +-------+-----+     
             ^                                         |
             |            CE Issues                    v
             |            Association                  |
             |            Setup                        |
             +_________________________________________+
           
]]></artwork>
</figure>
<t>
When communication fails between the FE and CE (which can be caused
      by either the CE or link failure but not FE related), either 
      the TML on the
      FE will trigger the FE PL regarding this failure or it will be detected
      using the HB messages between FEs and CEs. The communication failure,
      regardless of how it is detected, MUST be considered as a loss of
      association between the CE and corresponding FE.
</t>
<t>If the FE's FEPO CE Failover Policy is configured to mode 0 
(the default), it will immediately transition to the pre-association
phase. This means that if association is again established, 
all FE state will need to be re-established.
</t>
					<t>
      If the FE's FEPO CE Failover Policy is configured to mode 1, 
      it indicates that the FE is capable of HA restart recovery.
      In such a case, the FE transitions to the Not Associated state and
      the CEFTI timer[RFC 5810] is started. The FE MAY continue to forward
      packets during this state. It MAY also recycle through any configured
      backup CEs in a round-robin fashion. It first adds its
      primary CE to the bottom of table BackupCEs and sets its CEID 
      component to be the first secondary retrieved from table BackupCEs. 
      The FE then attempts to associate with the CE designated as the new 
      primary CE. 
      If it fails to re-associate with any CE and the
      CEFTI expires, the FE then transitions to the pre-association state.
      </t>
					<t>
      If the FE, while in the not associated state, manages to reconnect
      to a new primary CE before CEFTI expires it transitions to the
      Associated state. Once re-associated, the CE tries to synchronize any
      state that the FE may have lost during the not associated state.  
      How the CE re-synchronizes such state is out of scope for
      the current ForCES architecture but would include issuing new
      configs and queries.
      </t>
					<t>
      An explicit message (a Config message setting Primary CE component in
      ForCES Protocol object) from the primary CE, can also be used to change
      the Primary CE for an FE during normal protocol operation. In this case,
      the FE transitions to the Not Associated State and attempts to 
      Associate with the new CE.
      </t>
					<!--
<t>XXX: I think we should remove the paragraph below. It seems to
me when i read it to add more confusion than explain any thing useful ..
</t>
      <t>
      Also note that the FEs in a ForCES NE could also use a multicast
      CE ID, i.e., they could be associated with a group of CEs (this 
      assumes the use of a CE-CE synchronization protocol, which is 
      out of scope for this specification). In this case, the loss of 
      association would mean that
      communication with the entire multicast group of CEs has been lost. The
      mechanisms described above will apply for this case as well during the
      loss of association. If, however, the secondary CE was also using the
      multicast CE ID that was lost, then the FE will need to form a new
      association using a different CE ID. If the capability exists, the FE MAY
      first attempt to form a new association with original primary CE using a
      different non multicast CE ID.
      </t>
-->
</section>
<section title="Responsibilities for HA">
<!--
<t>XXX: we may remove this section (not much value to overall
discussion)
</t>
-->
<t>TML Level:</t>
<list style="numbers">
<t>The TML controls logical connection availability and
          failover.</t>
<t>The TML also controls peer HA management.</t>
</list>
<t>At this level, control of all lower layers, for example transport
level (such as IP addresses, MAC addresses etc) and associated links
going down are the role of the TML.</t>
<t>PL Level: <vspace/> All other functionality, including
configuring the HA behavior during setup, the CE IDs used to
identify primary and secondary CEs, protocol messages used to report CE
failure (Event Report), Heartbeat messages used to detect association
failure, messages to change the primary CE (Config), and other HA
related operations described in <xref target="HAnow"/>, are the PL's responsibility.</t>
<t>To put the two together, if a path to a primary CE is down, the TML
would take care of failing over to a backup path, if one is available.
If the CE is totally unreachable then the PL would be informed and it
would take the appropriate actions described before.</t>
</section>
</section>
</section>
<section title="CE HA Hot Standby" anchor="hsb">
<t>
In this section we describe small extensions to the existing
scheme to enable hot standby HA. To achieve hot standby HA,
we target specific goals defined in <xref target="quant"/>, namely:
 <list style="symbols">
  <t>
   How fast a backup CE becomes operational.
  </t>
  <t>
   How fast the FEs associate with the new master CE.
  </t>
 </list>
</t>
<t>
As described in <xref target="HAnow"/>, in the pre-association phase
the FEM configures the FE to make it aware of all the CEs in the NE. The FEM MUST 
configure the FE to make it aware of which CE is the master and MAY specify any 
backup CE(s). 
</t>

<section title="Changes to the FEPO model" anchor="fepo-changes">
<t>In order for the above to be achievable there is a need to make 
a few changes in the FEPO model. <xref target="Appendix"/> contains the 
xml definition of the new version 2 of the FEPO LFB.
</t>
<t>Changes from the version 1 of FEPO are:</t>
<t>
<list style="numbers">
<t>Addition of a new datatype, status (unsigned char) with special 
values 0 (Disconnected), 1 (Connected), 2 (Associated), 
3 (Lost_Connection) and 4 (Unreachable).</t>
<t>Change Component BackupCEs (9) to AllCEs and instead of an 
Array of unsigned integers(CEID), it MUST be an Array of unsigned 
integers (CEID) and unsigned char (status) for each CE.</t>
<t>Add two special values to the CEFailoverPolicyValues. 
2 (High availability without Graceful restart) and 
3 (High availability with Graceful restart).</t>
<t>Added one additional Event, the HAPrimaryCEDown event which reports last known CEID and tentative new master CEID.</t>
</list>
</t>
<t>As the FEPO component 9 is not backwards compatible with the previous version there is the issue of interoperability between CE and FE. However this is a pre-association version mismatch and the managers have to identify the issue and not allow an association that would fail or cause problems.</t>
</section>

<section title="FEPO processing " anchor="fepo-processing">
<t>
The FE's FEPO LFB version 2 AllCEs table (previously BackupCEs) contains 
all the CEIDs that the FE may connect and associate with. The ordering 
of the CE IDs in this table defines the priority order in which an FE 
will connect to the CEs.
In the pre-association phase, the first CE ID (lowest table index) 
in the AllCEs table MUST be the first CE ID that the FE will attempt 
to connect and associate with. 
If the FE fails to connect and associate with the first CE ID, it will 
attempt to connect to the second CE ID and so forth, and cycles back to the 
beggining of the list until there is a connection and an association.
The FE MUST associate with at least one CE.
Upon a successful association, the FEPO's CEID component identifies the 
current associated master CE.
</t>
<t>
For the sake of simplicity, the FE MUST respond to messages issued only
by the master CE. This simplifies the synchronization and avoids the 
concept of locking FE state. i.e the FE MUST drop any messages from backup CEs. 
However, asynchronous events that the master CE has subscribed to, as well as 
heartbeats are sent to all associated-to CEs.
Packet redirects continue to be sent only to the master CE. The Heartbeat 
Interval, the CEHB Policy and the FEHB Policy MUST be the same
for all CEs.
</t>

<t>
<xref target="FESM-HA"/> illustrates the state machine that facilitates connection
recovery with High Availability enabled.
</t>

<figure anchor="FESM-HA" title="FE State Machine considering HA">
					<artwork><![CDATA[

                          FE tries to associate    
                               +-->-----+
                               |        |
	                       ^        v
(CE issues Teardown ||    +----+--------+---+     
   Lost association) &&   | Pre-Association |
  CE failover policy = 0  | (Association    +<-------------------+
      +------------>-->-->|   in            +<-----+             |
      |                   | progress)       |      |             |
      |     CE Issues     +--------+--------+      |             |
      |     Association        |                   |             |
      |       Response         V            Not Found || CEFTI   |
      |     ___________________+              timer expires      |
      |     |                                      |             |
      |     V                                      ^             |
    +-+-----------+                         +------+------+      |
    |             |                         |  Not        |      |
    |             |  (CE issues Teardown || |  Associated |      |
    |             |    Lost association) && |             |    CEFTI
    | Associated  | (CE Failover Policy=2|| | (May        |    timer
    |             |  CE Failover Policy=3)  | Continue    |   expires
    |             +---------->------->----->|  Forwarding)|      |
    |             |                         |             |      |
    |             |                         | Search for  |      |
    |             |              +--------->| next        |      |
    |             |              |          | associated  |      |     
    |             |              |          | CE          |      |
    +-------------+              |          +-------------+      |     
         ^                       |                V              |
         |                       |                |              |
         |                       |             Found CE          |
         |                  CEHDI Expires   Send Event of        | 
         |                       |            New CE ID.         |
         |                       |                |              |
         |                       |                V              |
         |                       |         +------+------+       |
         |                       ^---------+ Confirm     +-------^
         |                                 | State       | 
         |              Received     +---->|             | 
         |              different    |     | Wait for CE | 				
         |              CE ID.       ^     | to confirm  | 
         |              Resend Event |     | new CE ID   | 
         |                           +----<|             | 
         |                                 +-----+-------+
         |           Received same CE ID         |				
         +_______________________________________+
       
]]></artwork>
</figure>

<t>
Once the FE has associated with a master CE it moves to the 
post-association phase (Associated state). 
In this state, the master CE MAY update the list of backup CEs.
It MAY also instruct the FE to use a different master CE.
It is assumed that the master CE will communicate with other CEs 
within the NE for the purpose of synchronization via the CE-CE interface. 
The CE-CE interface is out of scope for this document.
</t>
<figure anchor="seq_HA_Report_Primary_Hot_Standby" title="CE Failover for Hot Standby">
<artwork><![CDATA[

      FE                   CE#1         CE#2 ... CE#N
      |                      |            |        |
      | Asso Estb,Caps exchg |            |        |
    1 |<-------------------->|            |        |
      |                      |            |        |
      |      state update    |            |        |
    2 |<-------------------->|            |        |
      |                      |            |        |
      |        Asso Estb,Caps exchg       |        |
    3I|<--------------------------------->|        |
     ...                    ...          ...      ...
      |               Asso Estb,Caps exchg         |
    3N|<------------------------------------------>|
      |                      |            |        |
    4 |<-------------------->|            |        |
      .                      .            .        .
    4x|<-------------------->|            |        |
      |                   FAILURE         |        |
      |                      |            |        |
      | Event Report (CE#2 is new master) |        |
    5 |---------------------------------->|------->|
      |                                   |        |
      | Config (Set CEID to CEID of CE#2) |        |
    6 |<----------------------------------|        |
    7 |<--------------------------------->|        |
      .                      .            .        .
    7x|<--------------------------------->|        |
      .                      .            .        .
 ]]></artwork>
</figure>

<t>
While in the post-association phase, if the CE Failover Policy is set 
to 2 (High Availability without Graceful Restart) or 3 (High Availability 
with Graceful Restart) then the FE, after succesfully associating with 
the master CE, MUST attempt to connect and associate with all the CEs 
that is aware of.  <xref target="seq_HA_Report_Primary_Hot_Standby"/> 
steps #1 and #2 illustrates the FE associating with CE#1 as the master and 
then proceeding to steps #3I to #3N the association with backup CE's CE#2 to CE#N.
If the FE fails to connect or associate with some CEs, the FE MAY 
flag them as unreachable to avoid continuous attempts to connect.
The FE MAY retry to reassociate with unreachable CEs when possible.
</t>
<t>
When the master CE for any reason is considered to be down, then the FE 
will try to find the first associated CE from the list of all CEs in a 
round-robin fashion.
</t>
<t>If the FE is unable to find an associated FE in its list of CEs, 
then it will attempt to connect and associate with the first 
from the list of all CEs and continue in a round-robin fashion 
until it connects and associates with a CE.
</t>
<t>Once the FE selects the associated CE to use as the new master,
the FE then sends a High Availability Primary CE Changed Event Notification to all 
associated CEs to notifying them that the primary CE is down as well as which CE 
the reporting FE considers to be the new master.
</t>
<t>The new master CE MUST configure the CEID component of the FE 
within the time limit defined in the CEHDI Failover Timeout as a 
confirmation that the FE made the right choice.</t>

<figure anchor="seq_HA_Report_Primary_Hot_Standby2" title="CE Failover for Hot Standby">
<artwork><![CDATA[


      FE                   CE#1         CE#2 ... CE#N
      |                      |            |        |
      | Asso Estb,Caps exchg |            |        |
    1 |<-------------------->|            |        |
      |                      |            |        |
      |      state update    |            |        |
    2 |<-------------------->|            |        |
      |                      |            |        |
      |        Asso Estb,Caps exchg       |        |
    3I|<--------------------------------->|        |
      |                      |            |        |
     ...                    ...          ...      ...
      |               Asso Estb,Caps exchg         |
    3N|<------------------------------------------>|
      |                      |            |        |
    4 |<-------------------->|            |        |
      .                      .            .        .
    4x|<-------------------->|            |        |
      |                   FAILURE         |        |
      |                      |            |        |
      | Event Report (CE#2 is new master) |        |
    5 |---------------------------------->|------->|
      |                      |            |        |
      |           CEHDI Failover Timeout  |        |
      |                      |            |        |
      | Event Report (CE#N is new master) |        |
    6 |---------------------------------->|------->|
      |                      |            |        |
      |     Config (Set CEID to CEID of CE#N)      |
    7 |<-------------------------------------------|
    8a|<------------------------------------------>|
      .                      .            .        .
    8x|<------------------------------------------>|

]]></artwork>
</figure>

<t>
If the FE does not get confirmation within the CEHDI Failover Timeout,
it picks the next CE on its list and advertises it as the new
master. <xref target="seq_HA_Report_Primary_Hot_Standby2"/> illustrates
in step #5 selecting CE#2 as its new master. In step #6, the timeout
occurs and it picks CE#N as its new master. The FE receives
confirmation that CE#N is the new master in step #7. 
</t>
<t>
If the CE the FE assumed to be the master discovers that it should not be the new master 
CE, then it will configure the CEID with the ID of the proper master CE. How the 
CE decides who the new master CE is, is also out of scope of this document and is 
assumed to be done via a CE-CE communication protocol. The FE must then associate
with then new CE.
</t>

<t>If the CEFTI timer expires at either the not-associated or confirm states
without a new master CE confirmed, then the FE MUST revert to the 
pre-association stage.</t>

<t>
In most High Availability architectures there exists the possibility of 
split-brain. However, since in our setup the FE will never accept any 
configuration messages from any other than the master CE, we 
consider the FE as fenced against data corruption from the other CEs 
that consider themselves as the master. The split-brain issue becomes mostly a 
CE-CE communication problem which is considered to be out of scope.
</t>
<!-- 
<t>
The FE's FEPO LFB
CEID component identifies the current master CE and table 
BackupCEs identifies the backup CEs.
The FE only connects to the master CE and then proceeds to associate
with it. The master thereafter controls the FE and receives events
from it. This continues until there is communication failure between
the FE and CE at which point the FE attempts to connect to a CE
from the BackupCEs table until it succeeds to connect and associate
with one listed CE.
</t>
<t>
It is recommended that at least one backup CE should be online. Doing
so will improve how fast the backup CE will take to be operational
(as opposed to bringing up a backup CE when we detect a master CE fault).
If we assume that a CE implementation does state synchronization 
between CEs, then
<-
between CEs (proprietary or as discussed in <xref target="CEsync"/>), then
we can zero out
the cost of making the backup CE operational and ready to serve FEs; in
such a case an associating FE could immediately become operational.
</t>
<t>
If we assume the presence of at least one backup CE online, we can
improve how fast the FEs associate with a new master CE by making
two changes:
</t>
<t>
The first change that needs to be made is to have the FE, soon
after successfully connecting and associating with the master CE,
to proceed and connect as well as associate with the rest of the CEs 
listed in the BackupCEs table. 
</t>
-->
			<t>
By virtue of having multiple CE connections, the FE switchover
to a new master CE will be relatively much faster. The overall effect
is improving the NE recovery time in case of communication
failure or faults of the master CE. This satisfies the requirement
we set to achieve.
</t>
			<!--
<t>
The second change is to have the FE respond to messages issued by any 
CE (including a backup CE) it is associated with. This keeps the FE
simple and as dumb as it is in the current definition.
</t>
-->

			<!--
<t>
Again for the sake of simplicity, asynchronous events and packet 
redirects continue to be sent only to the master CE. XXXX:
We need to rethink perhaps and discuss possibility of events being
sent to ALLCEIDs CEID (which the TML can translate to mean 
send-to-all-online-CES).
</t>
<t>
XXX: below paragraph needs text discussion ..
</t>
<t>
Again for the sake of simplicity, 
</t>
<t>
XXXX: We need to have an extra state for each CE (master, connected, 
associated, stats etc) on the FEPO - so probably another change 
to current FEPO components.
</t>
-->
			<!--
<t>
XXXX: What about FEs each assuming a different master CE - is that
a problem? It doesnt seem to be because what matters is how the CEs
agree between themselves who the master is. The FE responds to all
CEs.
</t>
<t>
XXXX: What other kind of traffic needs to be running between FE and backup
CEs? Heartbeats?
</t>
-->
		</section>
		</section>
		<!--
<section title="CE Fr Interface Communication" anchor="CEsync">
<t>
In this section, we define activities in the Fr interface in 
order to achieve the other two goals defined <xref target="quant"/>
<list style="symbols">
        <t>
       How fast the CE plane failure is detected.
       </t>
       <t>
         How fast the FEs recover their state and become operational.
       </t>
</list>
</t>
<section title="Basic Scope For Fr Interface" anchor="CEsyncscp">
<t>
In the Fr plane we expect to see liveliness detection and configuration.
</t>
<t>
In the case of a fault of a master CE being detected by liveliness, we 
expect there is going to be an election to choose a new master CE.
</t>
<t>
It is also expected that the master CE will be updating the backup CEs
via configuration on any necessary NE state changes.
</t>
<t>
Our goal is to keep the Fr interface simple. For this reason, our
scope is not very ambitious and tries as much as possible to maintain
current ForCES architecture:
<list style="symbols">
  <t>
   We keep the number of active CEs at 1 and backup CEs at an arbitrary
   number, N. This is also known as 1+N setup which is also the currently
   defined ForCES architectural setup. So no changes there.
  </t>
  <t>
   Define that the protocol for the Fr protocol for both liveliness
   and synchronization be the current ForCES protocol. If there are
   any changes to be made they should be very minimal.
  </t>
  <t>
   Define the use of the ForCES model as the way to describe what data
   and events are synchronized in the Fr interface. The LFB model is
   sufficient to describe components that the ForCES protocol could act on.
   We keep the state synchronization between CEs limited only to what
   the CE-FE (Fp) plane exchange and not anything else.
  </t>
  <t>
   Keep the CE set static and known at FEM/CEM configuration time and
   build a very simple CE master election process.
  </t>

</list>
</t>

<t>
In this section, we start by assuming the ForCES architecture (protocol
and model) and then extend it when necessary.
</t>
<section title="Fr Interface Operational Approach" anchor="CEsyncop">
<t>
Each CE on bootup knows the NE CE set as configured by the CEM. This
static approach greatly simplifies discovery. It is expected in most
operational setups, there will be one active and one backup CE.
</t>
<t>
Each backup CE does a ForCES association to the listed master CE.
</t>
<t>
The master CE updates backup CEs with configuration necessary to mantain
ForCES related NE state.
</t>
</section>

<section title="Fr Interface Liveliness protocol" anchor="CEsyncpr">
<t>
The ForCES protocol already has built-in heartbeats for liveliness detection.
If we define a CEPO LFB, in the same spirit as the FEPO LFB,
it should be sufficient to have ForCES act as the liveliness protocol in 
the Fr plane.  
</t>
<t>
XXX: We need to be very clear on what
is needed and reused from ForCES protocol.
XXX: What details does the CEPO carry? Example that seems to make sense:
What CE type (eg master/slave), Status (connected etc), Connectivity 
parameters, Dead intervals etc
</t>
</section>
<section title="Fr Interface Data Synchronization" anchor="CEsyncdt">
<t>
Most existing NE implementations in the industry run some
hot standby proprietary scheme. They synchronize many things using such a
scheme. Example they keep protocol state of things like OSPF, BGP, IKE etc.
We don't want to do that. 
</t>
<t>
We focus on a scope that specifies only the need
to migrate state and maybe configuration that is maintained by the CE on 
behalf of CE-FE plane. Not anything else. To be specific:
A master CE synchronizes to backup CEs
any state updates that happen on the CE-FE plane that it controls.
</t>
<t>
One challenge that will require an extension to the ForCES protocol is
on how to communicate (from the master CE to a backup CE) the details
about an LFB component state change that happened in a specific FE.
</t>
<t>
We propose to introduce a new protocol TLV at the same hierachy level as LFB 
selector. Operationally, this TLV will define that a set of state changes
that happened apply to a specific FE. For this reason it will encompass the 
FEID on which the update happened on. We call it the applies-to TLV.
</t>
<t>
Lets say an update has happened (or depending on update scheme needs to 
happen) on FE z, LFB-a/instance-b/path-c from the controlling CE x, then
the synchronization method to backup CE y will be in the form of a config
message from master CE x to backup CE y that will have a message 
source CEID of x and destination CEID of y.
The  applies-to TLV will contain FEID z. The rest of the
message will be exactly as if the CE x had sent a config message to FE z
and will contain  the path LFB-a/instance-b/path-c
</t>
<t>
XXX: Refer to IETF 77 presentation slide 11 for choices on how to do CECE
synchronization in conjunction with FEs. The consensus seems to lean on
the second scheme..
</t>
</section>
<section title="Fr Interface Election" anchor="CEsyncel">
<t>
Upon failure detection of the master CE, a very simple election occurs.
The CE with the lowest CEID wins. Operationally, all CEs associate to the 
next lowest CEID. This is easy to execute since the static CE list never 
changes.
</t>
<t>
XXX: Optimize - the master CE could keep tabs on which backup CEs are
alive and update the associated CEs CEPO table with status info
so this way if the next lowest CE is not alive, theres no point in
connecting to it when the master fails...
</t>
</section>


</section>

</section>
-->
		<!--
<section anchor="contrib" title="Contributors">
<t>
Jamal Hadi Salim has contributed to discussions that created
this document.
</t>
</section>
-->
		<section anchor="IANA" title="IANA Considerations">
			<t>
	TBA
</t>
		</section>
		<section anchor="Security" title="Security Considerations">
			<!--
-->
			<t>
	TBA
</t>
			<!-- 
-->
		</section>
		<!--

    <section anchor="Acknowledgements" title="Acknowledgements">
	    <t>
	TBA
</t>
    </section>
-->
	</middle>
	<back>
		<references title="Normative References">
    &rfc5810;
    </references>
		<references title="Informative References">
    &rfc3654;
    &rfc3746;
    &rfc5812;
    </references>
	</back>
	<section anchor="Appendix" title="Appendix I - New FEPO version">
	<t>XXX: Describe this to conform to LFB extensions as prescribed in the model</t>
		<figure>
			<artwork align="center"><![CDATA[
<LFBLibrary xmlns="http://ietf.org/forces/1.0/lfbmodel"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xsi:schemaLocation="http://ietf.org/forces/1.0/lfbmodel D:\Workspace\ForCES\XML\LFBSchema.xsd"
     provides="FEPO">
   <!-- XXX  -->
     <dataTypeDefs>
        <dataTypeDef>
           <name>CEHBPolicyValues</name>
                  <synopsis>
                      The possible values of CE heartbeat policy
                  </synopsis>
              <atomic>
              <baseType>uchar</baseType>
              <specialValues>
                 <specialValue value="0">
                   <name>CEHBPolicy0</name>
                   <synopsis>
                        The CE heartbeat policy 0
                   </synopsis>
                   </specialValue>
                 <specialValue value="1">
                    <name>CEHBPolicy1</name>
                    <synopsis>
                         The CE heartbeat policy 1
                    </synopsis>
                 </specialValue>
               </specialValues>
               </atomic>
         </dataTypeDef>
         <dataTypeDef>
            <name>FEHBPolicyValues</name>
                 <synopsis>
                     The possible values of FE heartbeat policy
                </synopsis>
              <atomic>
              <baseType>uchar</baseType>
              <specialValues>
                <specialValue value="0">
                  <name>FEHBPolicy0</name>
                  <synopsis>
                       The FE heartbeat policy 0
                  </synopsis>
                </specialValue>
                <specialValue value="1">
                   <name>FEHBPolicy1</name>
                   <synopsis>
                        The FE heartbeat policy 1
                   </synopsis>
                  </specialValue>
               </specialValues>
               </atomic>
         </dataTypeDef>
         <dataTypeDef>
         <name>FERestartPolicyValues</name>
               <synopsis>
                   The possible values of FE restart policy
               </synopsis>
              <atomic>
              <baseType>uchar</baseType>
              <specialValues>
                 <specialValue value="0">
                   <name>FERestartPolicy0</name>
                   <synopsis>
                        The FE restart policy 0
                   </synopsis>
                   </specialValue>
               </specialValues>
               </atomic>
         </dataTypeDef>
         <dataTypeDef>
         <name>CEFailoverPolicyValues</name>
               <synopsis>
                   The possible values of CE failover policy
               </synopsis>
              <atomic>
              <baseType>uchar</baseType>
              <specialValues>
                <specialValue value="0">
                   <name>CEFailoverPolicy0</name>
                   <synopsis>
                        The CE failover policy 0
                        No High Availability or Graceful Restart.
                   </synopsis>
                 </specialValue>
               <specialValue value="1">
                  <name>CEFailoverPolicy1</name>
                  <synopsis>
                       Graceful Restart
                  </synopsis>
                </specialValue>
                <specialValue value="2">
                   <name>CEFailoverPolicy2</name>
                   <synopsis>
                        High Availability without Graceful Restart
                   </synopsis>
                 </specialValue>
               <specialValue value="3">
                  <name>CEFailoverPolicy3</name>
                  <synopsis>
                       High Availability with Graceful Restart
                  </synopsis>
                </specialValue>
               </specialValues>
               </atomic>
         </dataTypeDef>
        <dataTypeDef>
           <name>FEHACapab</name>
                  <synopsis>
                      The supported HA features
                  </synopsis>
              <atomic>
              <baseType>uchar</baseType>
              <specialValues>
                 <specialValue value="0">
                   <name>GracefullRestart</name>
                   <synopsis>
                        The FE supports Graceful Restart
                   </synopsis>
                   </specialValue>
                 <specialValue value="1">
                    <name>HA</name>
                    <synopsis>
                         The FE supports HA
                    </synopsis>
                 </specialValue>
               </specialValues>
               </atomic>
         </dataTypeDef>
         <dataTypeDef>
         <name>CEStatusType</name>
			 <synopis>
				 Status values. Status for each CE.
			 </synopis>
			 <atomic>
			 <baseType>uchar</baseType>
              <specialValues>
                 <specialValue value="0">
                   <name>Disconnected</name>
                   <synopsis>
                        No connection attempt with the CE yet.
                   </synopsis>
                   </specialValue>
                 <specialValue value="1">
                    <name>Connected</name>
                    <synopsis>
                         The FE has connected with the CE.
                    </synopsis>
                 </specialValue>
                 <specialValue value="2">
                   <name>Associated</name>
                   <synopsis>
                        The FE has associated with the CE.
                   </synopsis>
                   </specialValue>
                 <specialValue value="3">
                    <name>Lost_Connection</name>
                    <synopsis>
                         The FE was associated with the CE 
                         but lost the connection.
                    </synopsis>
                 </specialValue>
                 <specialValue value="4">
                    <name>Unreachable</name>
                    <synopsis>
                         The CE is deemed as unreachable by the FE.
                    </synopsis>
                 </specialValue>
               </specialValues>
			 </atomic>
         </dataTypeDef>
         <dataTypeDef>
			 <name>AllCEType</name>
			 <synopsis>
				 Table Type for AllCE component.
			 </synopsis>
			<struct>
				<component componentID="1">
					<name>CEID</name>
					<synopsis>ID of the CE</synopsis>
					<typeRef>uint32</typeRef>
				</component>
				<component componentID="2">
					<name>CEStatus</name>
					<synopsis>Status of the CE</synopsis>
					<typeRef>CEStatusType</typeRef>
				</component>
			</struct>
         </dataTypeDef>
     </dataTypeDefs>
     <LFBClassDefs>
       <LFBClassDef LFBClassID="2">
         <name>FEPO</name>
         <synopsis>
            The FE Protocol Object
         </synopsis>
         <version>2.0</version>
     <components>
           <component componentID="1" access="read-only">
             <name>CurrentRunningVersion</name>
             <synopsis>Currently running ForCES version</synopsis>
             <typeRef>u8</typeRef>
           </component>
           <component componentID="2" access="read-only">
             <name>FEID</name>
             <synopsis>Unicast FEID</synopsis>
             <typeRef>uint32</typeRef>
           </component>
           <component componentID="3" access="read-write">
              <name>MulticastFEIDs</name>
              <synopsis>
                 the table of all multicast IDs
              </synopsis>
              <array type="variable-size">
               <typeRef>uint32</typeRef>
              </array>
           </component>
           <component componentID="4" access="read-write">
             <name>CEHBPolicy</name>
             <synopsis>
              The CE Heartbeat Policy
             </synopsis>
             <typeRef>CEHBPolicyValues</typeRef>
           </component>
           <component componentID="5" access="read-write">
             <name>CEHDI</name>
             <synopsis>
               The CE Heartbeat Dead Interval in millisecs
             </synopsis>
             <typeRef>uint32</typeRef>
           </component>
           <component componentID="6" access="read-write">
             <name>FEHBPolicy</name>
             <synopsis>
               The FE Heartbeat Policy
             </synopsis>
             <typeRef>FEHBPolicyValues</typeRef>
           </component>
           <component componentID="7" access="read-write">
             <name>FEHI</name>
             <synopsis>
               The FE Heartbeat Interval in millisecs
             </synopsis>
             <typeRef>uint32</typeRef>
           </component>
           <component componentID="8" access="read-write">
             <name>CEID</name>
             <synopsis>
                The Primary CE this FE is associated with
             </synopsis>
             <typeRef>uint32</typeRef>
           </component>
           <component componentID="9" access="read-write">
              <name>AllCEs</name>
              <synopsis>
                 The table of all CEs.
              </synopsis>
              <array type="variable-size">
               <typeRef>AllCEType</typeRef>
              </array>
           </component>
           <component componentID="10" access="read-write">
             <name>CEFailoverPolicy</name>
             <synopsis>
               The CE Failover Policy
             </synopsis>
             <typeRef>CEFailoverPolicyValues</typeRef>
           </component>
           <component componentID="11" access="read-write">
             <name>CEFTI</name>
             <synopsis>
               The CE Failover Timeout Interval in millisecs
             </synopsis>
             <typeRef>uint32</typeRef>
           </component>
           <component componentID="12" access="read-write">
             <name>FERestartPolicy</name>
             <synopsis>
                The FE Restart Policy
             </synopsis>
             <typeRef>FERestartPolicyValues</typeRef>
           </component>
           <component componentID="13" access="read-write">
             <name>LastCEID</name>
             <synopsis>
                The Primary CE this FE was last associated with
             </synopsis>
             <typeRef>uint32</typeRef>
           </component>
         </components>
        <capabilities>
           <capability componentID="30">
              <name>SupportableVersions</name>
              <synopsis>
                 the table of ForCES versions that FE supports
              </synopsis>
              <array type="variable-size">
               <typeRef>u8</typeRef>
              </array>
           </capability>
           <capability componentID="31">
              <name>HACapabilities</name>
              <synopsis>
                 the table of HA capabilities the FE supports
              </synopsis>
              <array type="variable-size">
               <typeRef>FEHACapab</typeRef>
              </array>
           </capability>
         </capabilities>
         <events baseID="61">
           <event eventID="1">
             <name>PrimaryCEDown</name>
             <synopsis>
                 The pimary CE has changed
             </synopsis>
             <eventTarget>
                 <eventField>LastCEID</eventField>
             </eventTarget>
             <eventChanged/>
             <eventReports>
                <eventReport>
                  <eventField>LastCEID</eventField>
                </eventReport>
             </eventReports>
           </event>
           <event eventID="2">
             <name>HAPrimaryCEDown</name>
			 <synopsis>The primary CE has changed</synopsis>
			 <eventTarget>
			   <eventField>LastCEID</eventField>
			 </eventTarget>
			 <eventChanged/>
			 <eventReports>
			   <eventReport>
			     <eventField>CEID</eventField>
				 <eventField>LastCEID</eventField>
			   </eventReport>
			  </eventReports>
			</event>
         </events>
       </LFBClassDef>
     </LFBClassDefs>
   </LFBLibrary>
]]></artwork>
		</figure>
	</section>
</rfc>

