This file is linked with the documentation ! */

//---------------------------------------------------------------------------
// File:   C:\pic\GPSDO\gpsdo_pic_main.c
// Author: Wolfgang Buescher, DL4YHF
// Date:   2016-02-12
// Development System :  Microchip MPLAB IDE v8.85,
//                       XC8 C Compiler ("Free Mode") V1.35,
//         later also "MPLAB X" because "MPLAB" debugger is severely bugged.
//                 To test the algorithms *without* MPLAB,
//                 Borland C++ or any decent C compiler will do.
//
//
//---------------------------------------------------------------------------

//
//
// Literature: [PIC16F1783 DS] : PIC16F1782/3 datasheet, DS40001579E,
//             saved as   C:\datasheets\pic\PIC16F1782_3_datasheet.pdf
// REVISIONS: (latest entry first)
// 2016-02-13: Due to the incredibly poor (some say 'deliberately bloated')
//             code produced XC8 V1.35 "free", decided to give CC5X (free)
//             another chance, and adapted everything to compile with
//             CC5X besides XC8 and Borland C++ Builder.
//   But CC5X immediately whined about multiple C files in one project :
//       > Warning[1] .. : Relocatable ASM and MPLINK is not recommended
//       >                 (see README.TXT)
//   and (story told in CC5X's LINKER.TXT, not README.TXT): 
//       > Currently it is best to use a single C module for several reasons.
//       > MPLINK support was mainly offered to enable asm modules to be added.
//       > Limitations when using MPLINK:
//       >   1. Asm mode debugging only (C source code appear as comments)
//              (WB: heavens, no. Not again. We want true SOURCE LEVEL debugging.)
//       >   2. Multiple C modules does not allow the static local variable
//       >      stack to be calculated for the whole program, meaning that much
//       >      more RAM space will be used for local variables.
//       >   3. Call level checking must be done manually
//       >   4. Computed goto will be slower because the compiler 
//       >      can not check 256 byte address boundary crossing.
//       >   5. Inefficient RAM bank updating, meaning mode code.
//    WB: Ok, "message understood". So back to stuffing
//       'all we need' (in the C part) INSIDE A SINGLE MODULE.
//        To avoid having to turn the main module (gpsdo_pic_main.c)
//        into a bulky monster, the extra C source modules (ADC, UART,..?)
//        are now #included as if they were HEADER files.
// 2016-02-14 : gave up, no 16-bit SFR accesses in CC5X,
//              smaller but NON-FUNCTIONAL code. 
//             Thrown out ! Will never try this again. CC5X + MPLABX = waste of time.
//
// 2016-01-27: Removed many notes from the "brainstorming phase"
//             from this sourcecode, and dumped them into the
//             HTML file.
// 2015-12-21: Started project, brainstorming phase, initial tests
//             because looking at the PIC16F1782/3 Errata, this chip
//             seems to have a couple of bugs .
//
//*************************************************************************

#include "switches.h" // project specific 'compiler' switches & options
    // (it seems impossible to define the include files in MLPAB-X,
    //  and pass those settings on to the "custom translator" CC5X.
    //  So, like it or not, everything (*.c, *.h) had to be dumped
    //  into the stupid MPLAB-X 'project directory'. What a mess. )


// Include an awful lot of compiler-specific junk ...
#ifdef __BORLANDC__  // compiling with Borland C ? Use WB's "PIC emulator" ..
# include "pic_emulator/xc.h"
#elif (defined __XC8) // compiling for PIC, using Microchip's "XC8" compiler ?
# include "xc.h"
 // What a mess. "xc.h" includes "htc.h" .
 //              "htc.h" includes "hc.h" if not already included. Whow.
 //              "htc.h" also includes "cci.h", whatever that stands for.
 //              "htc.h" also includes "xc8debug.h", but "xc8debug.h" contains almost nothing.
 //              "cci.h" also includes "__at.h" . Yet another intuitive name.
 // Which of the above obfuscated headers actually includes "pic16lf1783.h" ?
 // #include   // No-No ! (included from xc.h, whatever 'xc' means)
# include "stdint.h"          // WB surrendered, now using 'uint8_t' instead of BYTE, etc
#else // neither Borland, nor XC8; (forget about CC5X ... it's NOT A C COMPILER)
# error Your compiler is not supported here yet. Please add support yourself.
#endif

#include "uart_pic.h" // "UART functions for PIC" by DL4YHF
#include "adc_pic.h"  // ADC initialisation, may also be PIC-specific

#include "gpsdo.h"    // header for THIS module


//---------------------------------------------------------------------------
// PIC16LF1783 Configuration Bit Settings
//---------------------------------------------------------------------------

// #pragma config statements should precede project file includes.
//   Use project enums instead of #define for ON and OFF.
//   Support for PIC16(L)F1783 was added in DL4YHF's WinPic,
//   so the settings for CONFIG WORD 1 and CONFIG WORD 2,
//   which are hopefully transferred into the HEX file by voodo magic
//   can be examined in WinPic (via Microchip's *.dev file) .

// CONFIG1 .  Details in PIC16(L)F1782/3 DS40001579, page 40 of 434 ...
#pragma config FOSC = ECH       // Oscillator Selection (ECH, External Clock, High Power Mode (4-32 MHz, CLKIN)
#pragma config WDTE = OFF       // Watchdog Timer Enable ? (don't.. this firmware doesn't feed it)
#pragma config PWRTE = ON       // Power-up Timer Enable ? (advisable when there is no external reset circuitry)
#pragma config MCLRE = ON       // MCLR Pin Function Select (MCLR/VPP pin function is MCLR)
#pragma config CP = OFF         // Flash Program Memory Code Protection (Program memory code protection is disabled)
#pragma config CPD = OFF        // Data Memory Code Protection (Data memory code protection is disabled)
#pragma config BOREN = ON       // Brown-out Reset Enable (Brown-out Reset enabled)
#pragma config CLKOUTEN = OFF   // Clock Out Enable (CLKOUT function is disabled. I/O or oscillator function on the CLKOUT pin)
#pragma config IESO = OFF       // Internal/External Switchover (Internal/External Switchover mode is disabled)
#pragma config FCMEN = ON       // Fail-Safe Clock Monitor Enable (Fail-Safe Clock Monitor is enabled)

// CONFIG2 .  Details in PIC16(L)F1782/3 DS40001579, page 42 of 434 ...
#pragma config WRT = OFF        // Flash Memory Self-Write Protection (Write protection off)
#pragma config PLLEN = ON       // PLL Enable (4x PLL enabled; 10 -> 40 MHz slightly violating the spec)
#pragma config STVREN = ON      // Stack Overflow/Underflow Reset Enable (Stack Overflow or Underflow will cause a Reset)
#pragma config BORV = LO        // Brown-out Reset Voltage Selection (Brown-out Reset Voltage (Vbor), low trip point selected.)
#pragma config LPBOR = OFF      // Low Power Brown-Out Reset Enable Bit (Low power brown-out is disabled)
#pragma config LVP = ON         // Low-Voltage Programming Enable (Low-voltage programming enabled)


//;**************************************************************************
//;                                                                         *
//; Port assignments                                                        *
//;                                                                         *
//;**************************************************************************
//
// Connections / circuit principle with 'typical' voltages (DC, average)
//                                            
//                   1.8V              3.6V     6.4V    10V
//        +3.6 V          __       __       ___     __       __
//          O         .--|__|--*--|__|--*--|___|---|__|--*--|__|--O + 12 V
// RGB-LED  |      610|   1k   |   2k2  |   /|\5k   4k7  |   4k7    (stable)
// indicator|      Hz,|50%   __|__    __|__  |(10       ---.
//     .----*----.    |PWM   _____    _____  | turn     /|\ 10 V
//    B|   G|   R|    |duty  10|uF   4.7|uF  | trim-     |  reference
//    ---  ---  ---   |cycle  _|_      _|_   | pot)     _|_ or Zener
//    /|\  /|\  /|\   |       ____           | 5V                      ICSP (Mclr/
//     |    |    |   PWM for |VC  |<---------          an.out an.in     |    Vpp)
//    .-.  .-.  .-.  V-Ctrl  |OCXO| Vctrl   ADC 80kHz    |   (I +  Q)  .-.
//    | |  | |  | |  /|\     |____|         sampling/   /|\   |    |   | |
//    |_|  |_|  |_|   |    10MHz|   ___     speed test   |    |    |   |_|
//     |1k  |2k  |2k  |        \|/   |        /|\        |   \|/  \|/   |
//     |    |    |    |         |    |         |         |    |    |    |
//     _    _    _    _    _    _    _    _    _    _    _    _    _    _
//    | |  | |  | |  | |  | |  | |  | |  | |  | |  | |  | |  | |  | |  | |
//   ----------------------------------------------------------------------
//  | 14   13   12   11    10   9    8    7    6    5    4    3    2    1  |
//  | RC3  RC2  RC1  RC0/ RA6  OSC1 GND  RA5  RA4  RA3  RA2  RA1  RA0 MCLR*|
//  |               PSMC1A    [RA7]                    /DAC /AN1 /AN0    __|
//  |                                                   OUT             -  |     
//  |                      PIC16(L)F1783, "SPDIP-28"                   /   |
//  |                                                                  \   |
//  |                                                         ICSPCLK/  -__|
//  |         PSMC2A/              CCP1/                          TXD/ RXD/|
//  | RC4  RC5  RC6  RC7  GND +Vcc  RB0  RB1  RB2  RB3  RB4  RB5  RB6  RB7 |
//  | 15   16   17   18   19   20   21   22   23   24   25   26   27   28  |
//   ----------------------------------------------------------------------
//    |_|  |_|  |_|  |_|  |_|  |_|  |_|  |_|  |_|  |_|  |_|  |_|  |_|  |_|
//
//               |    |    |    |    |                             |    |
//               |    |    |    |   /|\                            |    |
//               |    |   _|_   |    |                            \|/  /|\ .
//              \|/  \|/        |   GPS                            |    |
//               |    ?      +3.6V  Sync                          UART and
//               |           low-   (1Hz)                        programming
//          13.33333 MHz     noise    _                           adapter (ICSP)
//          for SDR-IQ              _|                    ICSP adapter pins :
//         ('programmable                                  1 = MCLR/Vpp  2 = Vcc
//           frequency output',                            3 = GND       4 = PGD/RXD
//       PFO : 40 MHz / N without FFA,                     5 = PGC/TXD   6 = LVP (nc)
//             20 MHz / (N + M/16) with FFA)
//
// (*) MCLR/Vpp has an internal pull-up resistor, so just leave it
//     unconnected during normal operation. The ICSP adapter will
//     set MCLR/Vpp to +9 V (!) for the PIC16LF1783 to enter programming mode.
//
//  An overview of ALL alternate port functions is in the PIC16F1782/3 
//  datasheet, DS40001579E, "28-PIN ALLOCATION TABLE" on page 5 .
//  Some of the alternate pin functions must be assigned via APFCON !
// 
#define PORTA_DIRECTIONS  0b00000011  // port A I/O mode (L=out, H=in !)
#define PORTA_INIT_DATA   0b00000000  // port A initial data (shortly after power-on)
                     //     ||||||||_ RA0 / *AN0*
                     //     |||||||__ RA1 / *AN1*
                     //     ||||||___ RA2 / AN2 / VREF- / DACOUT1
                     //     |||||____ RA3 / AN3 / VREF+ / DACVREF+ 
                     //     ||||_____ *RA4* / T0CKI (here: ADC sampling clock test output)
                     //     |||______ RA5 / AN4
                     //     ||_______ RA6 / OSC2/CLKOUT
                     //     |________ RA7 / PSMC1CLK / PSMC2CLK / OSC1/**CLKIN**
#define IOP_DEBUG_PIN1_LO PORTAbits.RA6=0 // used for debugging (via scope)
#define IOP_DEBUG_PIN1_HI PORTAbits.RA6=1 // used for debugging (via scope)
#define IOP_ADCCLK_PIN_LO PORTAbits.RA4=0 // also used for debugging (via scope)
#define IOP_ADCCLK_PIN_HI PORTAbits.RA4=1 // also used for debugging (via scope)

#define PORTB_DIRECTIONS  0b10000001  // port B I/O mode (L=out, H=in !)
#define PORTB_INIT_DATA   0b00000000  // port B initial data
                     //     ||||||||_ RB0 / AN12 / PSMC1IN / **CCP1** (input for GPS sync pulse)
                     //     |||||||__ RB1 / AN10 / OPA2OUT
                     //     ||||||___ RB2 / AN8  / OPA2IN-
                     //     |||||____ RB3 / AN9  / OPA2IN+ / CCP2 
                     //     ||||_____ RB4 / AN11 
                     //     |||______ RB5 / AN13 / T1G / SDO
                     //     ||_______ RB6 /         / *TXD* (out) / SDI / *ICSPCLK*
                     //     |________ RB7 / DACOUT2 / *RXD* (in)  / SCK / *ICSPDAT*
#define IOP_GPS_SYNC_ACTIVE PORTBbits.RB0


#define PORTC_DIRECTIONS  0b00000000  // port C I/O mode (L=out, H=in !)
#define PORTC_INIT_DATA   0b01001111  // port C initial data 
                     //     ||||||||_ RC0 / **PSMC1A** : 16-bit PWM for OCVCXO frequency control
                     //     |||||||__ *RC1* / PSMC1B / CCP2 (red LED, high=off)
                     //     ||||||___ *RC2* / PSMC1C / CCP1 (green LED, high=off)
                     //     |||||____ *RC3* /PSMC1D/SCK/SCL (blue LED, high=off)
                     //     ||||_____ RC4 / PSMC1E / SDI / SDA (i2c)
                     //     |||______ RC5 / PSMC1F / SDO (spi)
                     //     ||_______ RC6 / **PSMC2A** / (TXD)
                     //     |________ RC7 / PSMC2B     / (RXD)
                     
#define IOP_RED_LED_ON    PORTCbits.RC1=0  // RGB-LED (common anode) : 0 = ON  
#define IOP_RED_LED_OFF   PORTCbits.RC1=1  // RGB-LED (common anode) : 1 = off
#define IOP_GREEN_LED_ON  PORTCbits.RC2=0  // RGB-LED (common anode) : 0 = ON
#define IOP_GREEN_LED_OFF PORTCbits.RC2=1  // RGB-LED (common anode) : 1 = off
#define IOP_BLUE_LED_ON   PORTCbits.RC3=0  // RGB-LED (common anode) : 0 = ON
#define IOP_BLUE_LED_OFF  PORTCbits.RC3=1  // RGB-LED (common anode) : 1 = off 

// some bits on PORTA (old project, in PIC16F873A) remain available to communicate serially
// with the PLL synthesizer chip. THREE of those pins are required for the PMB2306:

#define IOP_PLL_EN   PORTAbits.RA0 // serial "enable" .  Pin 4 ("DA") at the PMB2306T (P-DSO14).
    // > Enable line of the serial control with internal pull-up resistor. 
    // > When EN = H the input signals CLK and DA are disabled 
    // > internally. When EN = L the serial control is activated.
    // > The received data are transferred into the latches
    // >  with the positive edge of the EN-signal.
#define IOP_PLL_DATA PORTAbits.RA1 // serial data line.  Pin 4 ("DA") at the PMB2306T (P-DSO14).
    // > Serial data input with internal pull-up resistor.
    // > The last two bits before the EN-signal define the destination address.
    // > In a byte-oriented data structure the transmitted data have to end
    // > with the EN-signal, i.e. bits to be filled in (don?t care)
    // > are transmitted first.
#define IOP_PLL_CLK  PORTAbits.RA2 // serial clock line. Pin 5 ("CLK") at the PMB2306T (P-DSO14).
    // > Clock line with internal pull-up resistor. The serial data are 
    // > read into the internal shift register with the positive edge
    // > (see pulse diagram for serial data control).
 // Other I/Os on Port A, not directly related with the PLL chip:
#define IOP_IN_RX_TX PORTAbits.RA3 // RX/TX sensing input (HI=RX, LO=TX)


//*************************************************************************
//                                                                        *
// Constants and timing parameters                                        *
//                                                                        *
//*************************************************************************

#define PIC_CLOCK_FREQ_HZ  equ  .40000000  ; processor clock frequency in Hertz (*not* the instruction cycle!)
#define PLL_REF_FREQ_HZ    equ  .10000000  ; input REFERENCE frequency for the PLL
#define PLL_MAX_R_COUNTER  equ  .5000      ; max divisor for the REFERENCE frequency -> min. phase comparator frequency:
   // example: PLL_REF_FREQ_HZ / PLL_MAX_R_COUNTER = 50 MHz / 5000 = 10 kHz (min phase comparator frequency, important for the loop filter design)
#define DEFAULT_VFO_FREQ   equ  .70160000  ; default VFO frequency in Hertz
#define RECEIVE_OFFSET_HZ  3579000 // 3.579 MHz = colour burst crystal frequency


//---------------------------------------------------------------------------
// Initial EEPROM contents ... note the clumsiness of this,
//  it's not even possible to embed VARIABLES or STRUCTS in EEPROM
//  so how do we know the ADDRESSES of the stuff located by __EEPROM_DATA ?
// As usual, others stumbled across similar problems, and asked:
// >  Is there a work-around out there that will allow initialization
// >  of a specific EEPROM location, without starting at the beginning ?
// __EEPROM_DATA seems to be a MACRO. It would be interesting to find
//   its definition. But that would be asking too much (for MPLAB "IDE").
//   Use Total Commander's glorious full-text search instead..
//   for 14-bit cores, it's in PIC.H, and the macro is just TERRIBLE.
//   It can ONLY accept EXACTLY EIGHT BYTES, which must be NUMERIC,
//   no C strings, no structs, no multi-byte integer constants... eeeek ! 
//---------------------------------------------------------------------------


#if ( EEPROM_SIZE > 0 )  // do we have an EEPROM ?
# define EEP_ADDR_PFO_PERIOD_L 0x00  // 16-bit 'period' (divisor for the programmable frequency output)
# define EEP_ADDR_PFO_PERIOD_H 0x01  // upper 8 bits. F_out = 40 MHz / (PFO_PERIOD+2)
# define EEP_ADDR_PFO_FFA      0x02  //  4-bit 'FFA' (fractional frequency adjust for the PSMC)

   // Initial EEPROM contents at offset 0x00 .. 0x07 : Programmable Frequency Output:
 __EEPROM_DATA   (  2,     0,  0x00,  0x00, ' ', 'P', 'F', 'O' ); /* 0x00 .. 0x07 */
   //              |lo____hi|     |
   //                |            |
   //       PFO_PERIOD      PFO_FFA
   //  With FFA=0 : f_PFO = 40 MHz / ( 1 + PERIOD_L + 256 * PERIOD_H )
   //      Example: f_PFO = 40 MHz / ( 1 + 83 + 256 * 0 ) = approx 476190.4762 Hz
 __EEPROM_DATA   ( 'd', 'l', '4', 'y', 'h', 'f', '.', 0x00 );
#else
# if( ! SWI_STANDALONE_SIMULATOR )  // only when compiling for a 'real' PIC:
#  error "Wot, no EEPROM ?!"
# endif
#endif


//---------------------------------------------------------------------------
// data types : moved to gpsdo.h
//---------------------------------------------------------------------------




//---------------------------------------------------------------------------
// global variables (true "local" variables don't exist in a PIC16 ! )
//---------------------------------------------------------------------------


uint16_t wWaitCounter; // "uint16_t" = the data type formerly known as "WORD"
uint8_t  bTemp;        // "uint8_t"  = the data type formerly known as "BYTE"
uint8_t  bMainLoopCounter;
int8_t   i8Temp;
int16_t  i16Temp;       // temporary 16-bit signed integer value
U_4Byte  i32TempX;      // general purpose 32-bit value (usually the 'left operand')
U_4Byte  i32TempY;      // general purpose 32-bit value (usually the 'right operand' and/or result)

int16_t  i16LowpassIn;  // 16-bit input for the lowpass (running at Fs = 610 Hz)
U_4Byte  i32LowpassOut; // lowpass-filtered output (connected to the VCOCXO's "Vctrl"-input)
uint8_t  bLowpassSpeed; // 1: tau=107 seconds, 2: 53.7 s, 3: 35.8 s, 4: 26.8 s, ... 8: 13.4 s
uint16_t u16VctrlBias;  // Ideally 32767 for a 50 % PWM duty cycle,
                        // determined during the initial 255-second "coarse" frequency measurement.

// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
// CIC filter variables (decimating filter for the analog input, not GPSDO-related) .
//  Based on an AVR(!) code sample by Bruce Land, Cornell University:
// > Filter parmeters: N=4, R=4, M=2, i.e.:
// >  -- N = order=4 (4 integrators)     [number of integrator and comb stages]
// >  -- R = downsample=4                [decimation ratio, called D in Lacoste's article]
// >  -- M = delay (after downsample) =2 [differential delay, "usually 1 but sometimes 2")]
// > Bits required is B_out = N*log2(R*M) + B_in
// >      or 4*3+16=28, so 32 bit longs should be fine .
// Or, from another article (Mercury Receiver) about CIC filters:
// > For CIC decimators, the gain G at the output of the final comb section is:
// >    Gain = (R * M) ^ N
// > Assuming two's complement arithmetic, we can use this result to calculate
// > the number of bits required for the last comb due to bit growth.
// > If B_in is the number of input bits, then the number of ouput bits, B_out, is:
// >    B_out = N * log2( R * M ) + B_in
// > It also turns out that B_out bits are needed for EACH (?) integrator and
// > comb stage. The input needs to be sign extended to B_out bits, but LSB's
// > can either be truncated or rounded at later stages.
// >
// For a 12-bit ADC: B_in = 12, N=4, R=4, M=2 .
//      B_out = 4 * log2( 4 * 2 ) + 12 = 4 * 3 + 12 = 24
//     (which means 24-bit integer arithmetics
//      may be sufficient. XC8 nativaly supports that type,
//      Microchip call it 'signed short long'. )
//
// PIC-implementation specific details and notes (concerning the CIC filter):
//
//  - To avoid wasteful bank-switching in the CIC filter code,
//    all filter variables were squeezed inside one RAM bank.
//    There may be more elegant solutions than absolute addresses,
//    but the inline assembler cannot access struct components..
//
#if SWI_STANDALONE_SIMULATOR
# define __AT_(addr)  // don't even think about absolute addresses on a PC
# define cic_t long /* standard C compilers don't support 24 bit so use 32 (at least)*/
# define cic_s 4      // size of each 'cic_t' element in bytes
#else
  // Compiling for PIC16F1783 ? Try 24-bit integer, 
  // and locate the CIC filter variables in ONE BANK !
  // "signed short long" is XC8's name for 24-bit :
# if(0)
#  define cic_t long  
#  define cic_s 4      // size of each 'cic_t' element in bytes
# else
#  define cic_t signed short long  
#  define cic_s 3      // size of each 'cic_t' element in bytes
# endif
  // sizeof(T_CIC_Filter) is approximately 19 * 4 bytes;
  // each of the PIC16F1783's RAM-banks has 80 bytes 'general purpose' registers,
  // besides the SFRs and a tiny 'common RAM' area.
  // Bank 0 : general purpose registers at 0x020 .. 0x06F;
  // Bank 1 : general purpose registers at 0x0A0 .. 0x0EF;
  // Bank 2 : general purpose registers at 0x120 .. 0x16F; etc..
# define __AT_(addr) @ addr+0x0A0
#endif
cic_t xx                 __AT_(0*cic_s);  // input to the CIC filter
cic_t yy                 __AT_(1*cic_s);  // output from the CIC filter (brought down to 16 bit again)
cic_t integrator1        __AT_(2*cic_s);
cic_t integrator2        __AT_(3*cic_s);
cic_t integrator3        __AT_(4*cic_s);
cic_t integrator4        __AT_(5*cic_s);
cic_t last_integrator4   __AT_(6*cic_s);
cic_t last2_integrator4  __AT_(7*cic_s);
cic_t comb1              __AT_(8*cic_s);
cic_t comb2              __AT_(9*cic_s);
cic_t comb3              __AT_(10*cic_s);
cic_t comb4              __AT_(11*cic_s);
cic_t last_comb1         __AT_(12*cic_s);
cic_t last_comb2         __AT_(13*cic_s);
cic_t last_comb3         __AT_(14*cic_s);
cic_t last_comb4         __AT_(15*cic_s);
cic_t last2_comb1        __AT_(16*cic_s);
cic_t last2_comb2        __AT_(17*cic_s);
cic_t last2_comb3        __AT_(18*cic_s);
cic_t last2_comb4        __AT_(19*cic_s);
uint8_t downsample_cnt   __AT_(20*cic_s);   // downsampling counter for the CIC filter
uint8_t fflags           __AT_(1+20*cic_s); // flags for the filter output (send where, etc)
// end of the CIC filter variables
// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


char sz80Temp[81]; // string buffer for the incredible "high-resolution" plotter
char cDebugMode;   // 0   : don't send debug-messages to the serial port (UART),
                   // 'n' : emit numeric values (for debugging),
                   // 'p' : plot some of those values (also for debugging),
                   // ...

// Variables for measurement of the OCVCXO frequency, error integral, etc :
uint8_t  bSyncPulseCounter;  // number of GPS pulses, 0..255 (stops at 255)
int16_t  i16FreqOffset;      // measured frequency MINUS 10 MHz, unit: see ProcessCapturedSyncPulse()
uint16_t wPrevCapture;       // timer value captured on PREVIOUS sync pulse
int16_t  i16ErrorIntegral;   // summed-up error value (for the controller's 'I'-part)
U_4Byte  i32ErrorIntegralIntegral; // integrated error-integral (to get not only the FREQUENCY-, but also the PHASE-error back to zero)



//---------------------------------------------------------------------------
// Interrupt handler ("there can be only one" .. on a midrange PIC..)
//---------------------------------------------------------------------------
#if SWI_STANDALONE_SIMULATOR
void ADC_Sampling_ISR(void) // for simulation, periodically called from PIC_Emulator()
#else
void __interrupt ADC_Sampling_ISR(void)  // Interrupt function, XC8 flavour.
                                         // No need to specify an 'origin' here.
#endif
{
  // XC8 automatically generates the required interrupt context saving/restoring code.
  //
  // Cycle counts below are relative to the entry (breakpoint at ADC_Sampling_ISR),
  //       where the 'Instruction Cycles' counter in MPLAB's "Stopwatch" was reset .
  // HERE (after the ISR's red tape), PIC16F1783, XC8: 5 cycles
  //
  // Since we only use the interrupt for the ADC, there's no need to check the IRQ source.
  // It would be nice to have a HARDWARE-triggered A/D conversion, but that's impossible
  //    with the remaining resources (PSMC1, PSMC2, CCP1, CCP2 could, but they're all occupied !).
  // Having a fast-sampling analog input wasn't the primary goal of the GPSDO,
  // and a PIC16F178x would have a tough time (with lots of assembler code)
  // to perform reasonable anti-aliasing anyway.
  //       The A/D converter, and its "sampling trigger" (actually Timer2 in the PIC16)
  //       is initialized in a separate module by DL4YHF - see ADC_PIC16F1783.C .
  // To test if the conversion is sufficiently jitter-free (with a scope),
  // show the interrupt handler's activity on one of the GPIO pins:
  IOP_ADCCLK_PIN_HI;    // -> BSF 0x0C, 0x04  (0x0C = &PORTA)
  // Before any conditional code (which would cause phase jitter),
  //   read the ADC result register (in BANK 1),
  //   store the result in the filter's input variable (xx, also in BANK 1),
  //   and start the next conversion :
#if ( SWI_STANDALONE_SIMULATOR )  // no inline assembler for the standalone simulator (running on a PC)...  
  xx = ADRES;    // "A/D Result" .. what a name for a register
  // ex (AVR) : xx = ((int)(ADCH)<<4) ;  // read a/d converter
  //                               |___ why ?? must be AVR-specific !
  //
  ADCON0bits.GO = 1; // tell the ADC what to do : "GO" = "start next conversion"
  // -> BSF   0x1d, 0x1 ; set the 'GO'-bit in ADCON0
  // Perform the missing sign-expansion ? Not neccessary for the simulation,
  // but implemented in assembler (further below).
#else // not SWI_STANDALONE_SIMULATOR but compiling for a real PIC uC...
# asm
  // Assembler variant of the above "C" code :
  BANKSEL(_xx)   // select the RAM-bank with the CIC filter variables *AND* 'ADRESL/H'
                 // -> pseudo instruction, in fact "MOVLB 0x1" which no-one would understand)
#if(1)  // (1) : process real data from ADC,  (0) : TEST with constant input for MPLAB-SIM
  MOVF  BANKMASK(ADRESL), W  // read ADRESL (A/D conversion result, low byte)
  MOVWF BANKMASK(_xx+0)      // store in bits 7..0 of variable 'xx' (filter input)
  MOVF  BANKMASK(ADRESH), W  // read ADRESH (A/D conversion result, high byte)
  MOVWF BANKMASK(_xx+1)      // store in bits 15..8 of variable 'xx' (filter input)
# if(0) // failed attempt to 'synchronize' the ADC clock prescaler by turning the ADC off and on again
        // (the ADC clock precaler doesn't seem to be reset by this)
  CLRF  BANKMASK(ADCON0)     // turn the ADC off 
  BSF   BANKMASK(ADCON0), 0  // turn the ADC on again
  NOP   // > The GO/DONE bit should not be set in the same instruction that turns on the ADC
# endif // turn the ADC off and on again to clear its prescaler ?
  BSF   BANKMASK(ADCON0), 1  // start next conversion as early as possible (bit 1 = "GO")
                             // (the current drawn by the sample and hold input caused
							 //  a 400 ns long 'spike' on the active analog input, 
							 //  which should appear a constant time after the rising edge
							 //  from 'IOP_ADCCLK_PIN_HI'. But the time was NOT constant :
        //                          __________________________________       _
        // Test pin ("ADCCLK") : __|                                  |_..._|
        //                       ____  _______________________________________
        // Analog input voltage:     |/
        //                       __________  _________________________________
        //    another time:                |/
        //                       ______________________  _____________________
        //    another time:                            |/
        //                         | |     |           |                    |
        //    delta t (ns):        0 36(!) 436         2436 ns         12500 ns
        //  
        // -> See 'ADC speed test' in C:\pic\GPSDO\ADC_PIC16F1783.c !
        //   (A 12-bit A/D conversion seemed to require 17, not 15, "T_AD" cycles)
        //    1 / ( 17 * 0.8 us) = max. 73.52941176 kHz.... )
#else    // test with constant input; watch the filter output (yy) in MPLAB-SIM !
  MOVLW ( -10  & 255)        // dummy value replacing ADRESL (ADC result, low byte)
  MOVWF BANKMASK(_xx+0)      // store in bits 7..0 of variable 'xx' (filter input)
  MOVLW ( -10  >> 8)         // dummy value replacing ADRESH (ADC result, high byte)
  MOVWF BANKMASK(_xx+1)      // store in bits 15..8 of variable 'xx' (filter input)
#endif
#if( SWI_ADC_BITS_PER_SAMPLE==10 ) // PIC16F1783: ADC configured for 10 bits/sample ?
  // Shift the A/D converter result (already in 'xx') two bits to the left
  // to leave the code further below (which expects a "12-bit ADC") unchanged.
  // (Running the ADC at 10 instead of 12 bit resolution saves TWO 'T_AD' cycles).
  // Using 10 bits only was -more of less- just a test to find out
  //  if 12 bit really gives 12 bit more dynamic range, or if the
  //  ADC in the 'slightly overclocked PIC16F1783' is too noisy anyway,
  //  and if a MUCH FASTER dsPIC with 10 bit ADC would be better than
  //         a SLOWER ADC (in another dsPIC) with 12 bit resolution. )
  // See DS page 143: in "10-bit 2's compliment" mode, result was in ADRES bits 9..0,
  //         so shift 'xx' two times left to have the msbit in bit 11 again:
  LSLF BANKMASK(_xx+0)       // bit 7->CARRY, bits 6..0 -> bits 7..1, clear bit 0
  RLF  BANKMASK(_xx+1)       // bits 14..8 -> bits 15..9, CARRY-> bit 8
  LSLF BANKMASK(_xx+0)       // 2nd 16-bit "left-shift" ..
  RLF  BANKMASK(_xx+1)       // e.g. multiply 16-bit value by four in 4 instruction cycles..
                             // we can JUST afford doing that HERE, at fs_in :)
#endif // ADC running in 10-bit conversion mode ?
  //							 
  // Strange relation between the input voltage (fed into pin 'AN0')
  //                  and the result (in ADRES) :
  //   Vin   | 0.00 V     | 1.024 V        | 2.048 V (=Vref) | > 2.048 V
  //  -------+------------+----------------+-----------------+------------
  //   ADRES | -5..+5 (!) | 2047 +/- noise | 4093 ... 4095   | 4095 (saturated)
  //           |
  //           |__ no typo, ADRES was "slightly negative" sometimes !
  //               (would the '12 bit ADC' provide 13 bits with differential input?)
  //
  // Vin will be biased with 1.024 V input (half reference voltage
  //                        as selected per 'FVRCON' in ADC_Init() ),
  // so the above offset must be subtracted later (to save clock cycles: AFTER decimation).
  // 
  // From the PIC16F1782/3 datasheet, page 143 :
  // > Two's complement is right justified with the sign
  // > extended into the most significant bits .
  // To avoid an overflow in the CIC filter calculation (which only supports 12-bit input),
  // avoid negative input from ADRES here:
  BTFSS BANKMASK(_xx+1),7  // bit 15 in 'xx' set ? suprise surprise ...
  GOTO  xx_not_negative
  CLRF  BANKMASK(_xx+0)    // -> clear xx bits 7..0
  CLRF  BANKMASK(_xx+1)    // -> clear xx bits 15..8
xx_not_negative:           // should get HERE when the input is properly biased (and not clipping)
  CLRF  BANKMASK(_xx+2)    // clear bits 23..16 of 'xx' (no SIGN EXPANSION required anymore)
# endasm
#endif // SWI_STANDALONE_SIMULATOR ?


  //========================================================
  // Below: 4th order CIC  (N=4, R=4, M=2) 
  //        for low-pass filtered decimation .
  // 
  // took 356 cycles on an AVR(!) when downsampling
  // took 172 cycles on an AVR(!) when just integrating
  //        (#cycles required on a PIC16 shown further below).
  //    
  //  See article by Bruce Land, Cornell University, locally saved as
  //  C:\literatur\DSP_Andere\CIC_Filter_and_DSP_for_GCC__Cornell_2013.htm ,
  //   and Circuit Cellar, October 2009, page 50 (alias page 52 of the PDF),
  //  C:\literatur\DSP_Andere\Circuit_Cellar_October_2009.pdf .
  //
  //                                  _________       ________
  //            integrator1, 2,3,4   | drop 3  |     |Comb    |
  //  xx ---->(+)------------- ... --|   of 4  |-----|section,|---> yy
  // ('ADRES') |    ______   |       | samples |     |details |    
  //      .    |   |      |  |       |_________|  .  |_below__|
  //      .    '---| Z^-1 |--'                    .
  //      .        |______|                       .
  //     fs_in                                   fs_out = fs_in/4
  //
  // On a PIC16, with F_osc=40 MHz, F_cycle=10 MHz, and F_sample=80 kHz,
  //  there are only 10/0.08 = 125 instruction cycles between two conversions
  //  so it will be tough to run the complete filter 'in time' !
  //  Because the PIC isn't fast enough to calculate all this
  //  in a SINGLE PERIOD of the *INPUT* sampling clock,
  //  the following block must be separated into FOUR BLOCKS
  //  of similar execution time. We wouldn't do that in "C" of course.
  //  To avoid cluttering the original C code (below),
  //  the hand-crafted assembler code is shown further below.
  //
#if( SWI_STANDALONE_SIMULATOR ) // 2016-02-15 : The code originally generated by XC8 
  //  was MUCH too slow.. this would only be ok for a few kHz sampling !
  // 
  // Calculate the four integrators. This code runs at the ADC sampling rate (fs_in).
  integrator1 += xx ;
  // EMU_Truncate24Bit( &integrator1 );
  //
  // Test with xx=1, at THIS point, using 32-bit integers (24 bit example in the PIC assembler version):
  //  integrator1 =   1,  2,   3,   4,   5,   6,   7,   8,    9,   10,  11, ... 100, ...... 1000,
  //  integrator2 =   0,  1,   3,   6,  10,  15,  21,  28,   36,   45,  55,    4950,      499500,
  //  integrator3 =   0,  1,   4,  10,  20,  35,  56,  84,  120,  165, 220,  166650,   166666500,
  //  integrator4 =   0,  1,   5,  15,  35,  70, 126, 210,  330,  495, 715, 4249575, -1199714710,
  //        comb1 =   0,  0,   0,   0,  35,  35,  35,  35,  330,  330, 330, 1091706,  1307521506,
  //        comb2 =   0,  0,   0,   0,  35,  35,  35,  35,  330,  330, 330,  256656,    31331856,
  //        comb3 =   0,  0,   0,   0,  35,  35,  35,  35,  330,  330, 330,   43776,      504576,
  //           yy =   0,  0,   0,   0,   0,   0,   0,   0,    1,    1,   1, ...   1,        1(!),
  //
  if( (integrator1==100) || (integrator1==1000) || (integrator1==10000) )
   {   xx=xx;   // <<< place for a 'conditional breakpoint'
   }
  integrator2 += integrator1;
  // EMU_Truncate24Bit( &integrator2 );
  integrator3 += integrator2;
  // EMU_Truncate24Bit( &integrator3 );
  integrator4 += integrator3;
  // EMU_Truncate24Bit( &integrator4 );


  ++downsample_cnt;
  // Run the 'combs' (Kammfilter) for every 4th input sample.
  // This generates the decimated, low-pass filtered output
  //  (at fs_out = fs_in/4) .
  //
  // ex: if( (downsample_cnt & 0x03) == 0)  // << this would be very slow on a PIC
  if( downsample_cnt & 4 )  // -> single-bit-test, faster on a PIC16 (btfss+goto)
   { downsample_cnt = 0;
     //
     // Here for every FOURTH sample (e.g. at fs_out, not fs_in).
     //
     //.....................................................................
     // Block diagram of the *COMB* section by WB, trying to               .
     //   understand the names and purposes                                .
     //   of the variables used in the code fragment further below,        .
     //   looking for a way to eliminate a few of these variables...       .
     //                                                                    .
     //                                                                    .
     //   --o--   = circuit node (with variable-name shown above)          .
     //     |                                                              .
     //                                                                    .
     //   +                                                                .
     // ->-(+)->- = Subtractor (note the signs of the inputs)              .
     //    -|                                                              .
     //                                                                    .
     //    .-.                                                             .
     //    |D|    = Delay stage (delays for a single sample)               .
     //    '-'                                                             .
     //                                                                    .
     // integrator4       comb1   +      comb2   +      comb3   +     'yy' .
     // ->-o---->---(+)->-o---->---(+)->-o---->---(+)->-o---->---(+)-->--  .
     //    |        -|    |        -|    |        -|    |        -|        .
     //    | .-. .-. |    | .-. .-. |    | .-. .-. |    | .-. .-. |        .
     //    '-|D|-|D|-'    '-|D|-|D|-'    '-|D|-|D|-'    '-|D|-|D|-'        .
     //      '-' '-'        '-' '-'        '-' '-'        '-' '-'          .
     //                                                                    .
     // Names:  ^    ^         ^    ^        ^    ^        ^    ^          .
     //        /|\  /|\       /|\  /|\      /|\  /|\      /|\  /|\         .
     //         |    |         |    |        |    |        |    |          .
     //         |    |         |    |        |    |        |    |          .
     //     last_  last2_   last_ last2_  last_ last2_  last_  last2_      .
     //      integrator4       comb1         comb2         comb3           .
     //                                                                    .
     //.....................................................................
     //
     // The above COMB SECTION is the price to pay for N=4, R=4, M=2 .
     //  ( another price to pay is the passband droop,
     //    but the receiver will compensate it on 'his' side.
     //    At least the PIC16 cannot run the Chebyshev compensator,
     //    which would follow right next to 'yy' in the above schematics.
     //  )
     //
     // First calculate the four 'comb' outputs (yy is the 4th) :
     comb1 = integrator4 - last2_integrator4;
     // EMU_Truncate24Bit( &comb1 );
     comb2 = comb1 - last2_comb1;
     // EMU_Truncate24Bit( &comb2 );
     comb3 = comb2 - last2_comb2;
     // EMU_Truncate24Bit( &comb3 );

#   if(1)
     yy = (int16_t)((comb3 - last2_comb3)>>12) ; // original gain scaling was >>12  (divide by 4096) !
     // Caution: the behaviour of the bitwise shift right operator may be 'implementation specific'.
     //          Borland expanded the sign as one would expect, other compilers didn't .
     // Test with xx=1, at THIS point, using 32-bit integers (24 bit example in the PIC assembler version):
     //  call #      :   1,   2,    3,    4,    5,     6,     7,     8,     9,    10,    11,     12, ...        250,
     //  integrator1 =   4,   8,   12,   16,   20,    24,    28,    32,    36,    40,    44,     48, ...       1000,
     //        comb1 =  35, 330, 1330, 3547, 7490, 13674, 22610, 34810, 50786, 70050, 96114, 126490, ... 1323377930,
     //        comb2 =  35, 330, 1295, 3216, 6160, 10128, 15120, 21136, 28176, 36240, 45328,  55440, ...   31585680,
     //        comb3 =  35, 330, 1260, 2886, 4865,  6912,  8960, 11008, 13056, 15104, 17152,  19200, ...     506624,
     //           yy =   0,   0,   0,     0,    0,     0,     0,     1,     1,     1,     1,      1, ...          1,
     // w. xx=10: yy =   0,   0,   2,     6,    8,     9,     9,    10,    10,    10, ...                 

#   else
     yy = (int16_t)((comb3 - last2_comb3)    ) ;
#   endif
     //
     // Update state .. these are the EIGHT delay elements in the schematics.
     //
     //
     last2_integrator4 = last_integrator4;
     last_integrator4  = integrator4; // <- this cannot be 'postponed' ..
     last2_comb1       = last_comb1;  // <- but these calculations may ..
     last_comb1        = comb1;       // .. be postponed until the next
     last2_comb2       = last_comb2;  //    conversion cycle, to spread
     last_comb2        = comb2;       //    the CPU load more equally
     last2_comb3       = last_comb3;  //    between subsequent interrupts.
     last_comb3        = comb3;       // <- output of the LAST delay line
   } // end if < time to run the 'combs' and to update the CIC 'states' ? >
  // Cycles required by PIC16F1783, using the above C code compiled with 'XC8 free':
  //   FORGET IT ! The "free" edition produces severely bloated code,
  //               it's ok for the initialisation, but not for the filter code.
  //
#else  // Try to do this in PIC16 inline assembler ?
# asm  // Oh well. Using a stupid C compiler (XC8 'free')
       // to write most of this stuff in Assembler...
       // BANKSEL(_xx) // select the RAM-bank with the CIC filter variables, e.g. 'MOVLB 0x01'.
       // (not necessary when already in the right bank - see code above)

  // Calculate the four integrators. This code runs at the ADC sampling rate (fs_in, e.g. 80 kHz).
  // integrator1 += xx ;              //  shown in 'disassembly' |--|
  MOVF   BANKMASK(_xx),            W  // address : 0xA0 & 0x7F = 0x20 in the MOVF operand !
  ADDWF  BANKMASK(_integrator1+0), F  // address : 0xA4 & 0x7F = 0x24 ...
  MOVF   BANKMASK(_xx+1),          W  // address : 0xA1 & 0x7F = 0x20 ..  etc, etc
  ADDWFC BANKMASK(_integrator1+1), F  // address : 0xA5 & 0x7F = 0x25
  MOVLW  0                            // bits 23..16 : shouldn't this be SIGN EXTENDED (0xFF when negative) ??
  ADDWFC BANKMASK(_integrator1+2), F  // address : 0xA6 & 0x7F = 0x26 (bits 23..16, incremented on Carry)
# if( sizeof(cic_t) >= 4 )            // optional, for four-byte integer..
  ADDWFC BANKMASK(_integrator1+3), F  // address : 0xA7 & 0x7F = 0x27 (bits 31..24, optional)
# endif
  //
  // Test with xx=1, at THIS point, using 24-bit integers (32 bit example in the plain C version further above):
  //  integrator1 =   1,  2,   3,   4,   5,   6,   7,   8,    9,   10,  11, ... 100, ...... 1000,
  //  integrator2 =   0,  1,   3,   6,  10,  15,  21,  28,   36,   45,  55,    4950,      499500,
  //  integrator3 =   0,  1,   4,  10,  20,  35,  56,  84,  120,  165, 220,  166650,   166666500,
  //  integrator4 =   0,  1,   5,  15,  35,  70, 126, 210,  330,  495, 715, 4249575, -1199714710,

  // integrator2 += integrator1;
  MOVF   BANKMASK(_integrator1+0), W
  ADDWF  BANKMASK(_integrator2+0), F
  MOVF   BANKMASK(_integrator1+1), W
  ADDWFC BANKMASK(_integrator2+1), F
  MOVF   BANKMASK(_integrator1+2), W
  ADDWFC BANKMASK(_integrator2+2), F
# if( sizeof(cic_t) >= 4 )
  MOVF   BANKMASK(_integrator1+3), W
  ADDWFC BANKMASK(_integrator2+3), F
# endif

  // integrator3 += integrator2;
  MOVF   BANKMASK(_integrator2+0), W
  ADDWF  BANKMASK(_integrator3+0), F
  MOVF   BANKMASK(_integrator2+1), W
  ADDWFC BANKMASK(_integrator3+1), F
  MOVF   BANKMASK(_integrator2+2), W
  ADDWFC BANKMASK(_integrator3+2), F
# if( sizeof(cic_t) >= 4 )
  MOVF   BANKMASK(_integrator2+3), W
  ADDWFC BANKMASK(_integrator3+3), F
# endif

  // integrator4 += integrator3;
  MOVF   BANKMASK(_integrator3+0), W
  ADDWF  BANKMASK(_integrator4+0), F
  MOVF   BANKMASK(_integrator3+1), W
  ADDWFC BANKMASK(_integrator4+1), F
  MOVF   BANKMASK(_integrator3+2), W
  ADDWFC BANKMASK(_integrator4+2), F
# if( sizeof(cic_t) >= 4 )
  MOVF   BANKMASK(_integrator3+3), W
  ADDWFC BANKMASK(_integrator4+3), F
# endif

  // At THIS point, approximately 40 cycles after entering the ISR ...

  // ++downsample_cnt;
  INCF  BANKMASK(_downsample_cnt), F
  // switch( downsample_cnt ) ..
  MOVF  BANKMASK(_downsample_cnt), W
  ADDWF pcl, F   // add downsample_cnt to the current program counter, low byte.
  // No risk due to 8-bit overflow here because the interrupt service handler 
  //      is located a low address (to make the above computed goto possible).
  NOP                   // downsample_cnt = 0 : never occurs after incrementing downsample_cnt !
  GOTO  cic_emit_yy_lo  // downsample_cnt = 1 : emit old output (yy, LSB) to UART
  GOTO  cic_calc_combs2 // downsample_cnt = 2 : calculate the combs, 2nd part
  GOTO  cic_emit_yy_hi  // downsample_cnt = 3 : emit old output (yy, MSB) to UART
cic_calc_combs1: // in THIS 'phase' of downsampling, calculate the combs, 1st part
  // 4th downsampling phase: calculate combs 
  CLRF  BANKMASK(_downsample_cnt)  // back from 4th to 1st downsampling-phase

  // First part of the 'combs' calculation (see C source and schematics above).
  // At this point, t = 43 instruction cycles after entering the ISR.
  //
  // comb1 = integrator4 - last2_integrator4; 'zero' the stopwatch in MPLAB here !
  MOVF   BANKMASK(_last2_integrator4+0),W  // LSByte first.. RIGHT OPERAND for the subtraction 
  SUBWF  BANKMASK(_integrator4+0),W  // "Subtract W from f" ... here, with the destination in W, not 'f' ("file") !
  MOVWF  BANKMASK(_comb1+0)                // store result bits 7..0
  MOVF   BANKMASK(_last2_integrator4+1),W  // middle byte next..
  SUBWFB BANKMASK(_integrator4+1),W
  MOVWF  BANKMASK(_comb1+1)                // store result bits 15..8
  MOVF   BANKMASK(_last2_integrator4+2),W  // higher byte next..
  SUBWFB BANKMASK(_integrator4+2),W  
  MOVWF  BANKMASK(_comb1+2)                // store result bits 23..16
     // -> 24-bit subtraction WITH RESULT IN A THIRD VARIABLE requires 9 cycles.
# if( sizeof(cic_t) >= 4 )            // optional, for four-byte integer..
  MOVF   BANKMASK(_last2_integrator4+3),W  // highest byte last..
  SUBWFB BANKMASK(_integrator4+3),W  
  MOVWF  BANKMASK(_comb1+3)                // store result bits 31..24
# endif

  // comb2 = comb1 - last2_comb1;
  // BSF _CARRY_                     // Carry ~ "nothing borrowed" (for SUBWF) 
  MOVF   BANKMASK(_last2_comb1+0),W  // LSByte first..
  SUBWF  BANKMASK(_comb1+0),W        // note the destination is W, not F here!
  MOVWF  BANKMASK(_comb2+0)          // store result bits 7..0
  MOVF   BANKMASK(_last2_comb1+1),W  // middle byte next..
  SUBWFB BANKMASK(_comb1+1),W
  MOVWF  BANKMASK(_comb2+1)          // store result bits 15..8
  MOVF   BANKMASK(_last2_comb1+2),W  // higher byte next..
  SUBWFB BANKMASK(_comb1+2),W  
  MOVWF  BANKMASK(_comb2+2)          // store result bits 23..16
# if( sizeof(cic_t) >= 4 )           // optional, for four-byte integer..
  MOVF   BANKMASK(_last2_comb1+3),W  // highest byte last..
  SUBWFB BANKMASK(_comb1+3),W  
  MOVWF  BANKMASK(_comb2+3)          // store result bits 31..24
# endif

  // comb3 = comb2 - last2_comb2 :
  MOVF   BANKMASK(_last2_comb2+0),W  // LSByte first..
  SUBWF  BANKMASK(_comb2+0),W        // note the destination is W, not F here!
  MOVWF  BANKMASK(_comb3+0)          // store result bits 7..0
  MOVF   BANKMASK(_last2_comb2+1),W  // middle byte next..
  SUBWFB BANKMASK(_comb2+1),W
  MOVWF  BANKMASK(_comb3+1)          // store result bits 15..8
  MOVF   BANKMASK(_last2_comb2+2),W  // higher byte next..
  SUBWFB BANKMASK(_comb2+2),W  
  MOVWF  BANKMASK(_comb3+2)          // store result bits 23..16
# if( sizeof(cic_t) >= 4 )           // optional, for four-byte integer..
  MOVF   BANKMASK(_last2_comb2+3),W  // highest byte last..
  SUBWFB BANKMASK(_comb2+3),W  
  MOVWF  BANKMASK(_comb3+3)          // store result bits 31..24
# endif

  // yy = (int16_t)((comb3 - last2_comb3)>>12) ;  remember the 'CIC gain' !
  //                          8 of these ^^ 12 shifts are achieved via byte-offset.
  //  Since the original input (from the ADC) was 12 bits wide,
  //  and we will send 16-bit samples via the UART,
  //  don't remove the 'gain' in yy completely.
  //  This may actually give a few more bits of effecitve resolution.
  // Test with xx=1, at THIS point, using 23-bit integers (32 bit example in the "C" variant further above):
  //  call #      :   1,   2,    3,    4,    5,     6,     7,     8,     9,    10,    11,     12, ...        250,
  //  integrator1 =   4,   8,   12,   16,   20,    24,    28,    32,    36,    40,    44,     48, ...       1000,
  //        comb1 =  35, 330, 1330, 3547, 7490, 13674, 22610, 34810, 50786, 70050, 96114, 126490, ... 1323377930,
  //        comb2 =  35, 330, 1295, 3216, 6160, 10128, 15120, 21136, 28176, 36240, 45328,  55440, ...   31585680,
  //        comb3 =  35, 330, 1260, 2886, 4865,  6912,  8960, 11008, 13056, 15104, 17152,  19200, ...     506624,
  //      yy(*16) =   0,   0,   1,     4,   10,    15,    16,    16,    16,    16,    16,     16, ...          1,
  // w. xx=10: "" =   0,   1,  12,    48,  100,   141,   158,   160,   160,   160, ...
  //    yy remained stable even when the integrators or combs rolled over,
  //       and the signs of the intergrators and combs 'flipped like crazy'.
  // After thousands of comb calculations, with xx=2047 (constant): yy = 32752 .
  // After thousands of comb calculations, with xx=4095 (constant): yy = 65520 .
  // After thousands of comb calculations, with xx=2047 (constant): yy = 32752 .
  // After thousands of comb calculations, with xx=2047 (constant): yy = 32752 .
  //
  MOVF   BANKMASK(_last2_comb3+1),W  // LSByte first.. (without comb3 bits 7..0)
  SUBWF  BANKMASK(_comb3+1),W        //  minus , destination in W
  MOVWF  BANKMASK(_yy+0)             // store result bits 7..0
  MOVF   BANKMASK(_last2_comb3+2),W  // middle byte next..
  SUBWFB BANKMASK(_comb3+2),W
  MOVWF  BANKMASK(_yy+1)             // store result bits 15..8
  // To let the result (yy) look nice, the sign could be extended into bits 23..16:
  // But since we never use those bits (yet?), don't waste time with that.
# if( sizeof(cic_t) >= 4 ) // optional, for four-byte integer.. 24 bit out ?
# endif


  // Update 'previous' and 'previous previous' values for the CIC filter's comb section:
  // last2_integrator4 = last_integrator4;
  MOVF   BANKMASK(_last_integrator4+0), W  // load LSB..
  MOVWF  BANKMASK(_last2_integrator4+0)    // store MSB, etc..
  MOVF   BANKMASK(_last_integrator4+1), W
  MOVWF  BANKMASK(_last2_integrator4+1)
  MOVF   BANKMASK(_last_integrator4+2), W
  MOVWF  BANKMASK(_last2_integrator4+2)
# if( sizeof(cic_t) >= 4 ) // optional, for four-byte integer.. 24 bit out ?
  MOVF   BANKMASK(_last_integrator4+3), W
  MOVWF  BANKMASK(_last2_integrator4+3)
# endif

  // last_integrator4  = integrator4;
  MOVF   BANKMASK(_integrator4+0), W  // load LSB..
  MOVWF  BANKMASK(_last_integrator4+0)    // store MSB, etc..
  MOVF   BANKMASK(_integrator4+1), W
  MOVWF  BANKMASK(_last_integrator4+1)
  MOVF   BANKMASK(_integrator4+2), W
  MOVWF  BANKMASK(_last_integrator4+2)
# if( sizeof(cic_t) >= 4 )
  MOVF   BANKMASK(_integrator4+3), W
  MOVWF  BANKMASK(_last_integrator4+3)
# endif

  // At THIS point, the value in 'integrator4' isn't needed anymore,
  //  thus the calculations below (cic_calc_combs2) can be postponed 
  //  until the next A/D conversion cycle(s) .
  // The PIC16 firmware was almost running out of time to spend
  // in the interrupt handler (approx 90 cycles after entering the ISR),
  // so it's time to return from the interrupt here :
  goto cic_exit

  // - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // above: first part of the COMB FILTER calculations .
  // below: second part of the COMB FILTER calculations,
  //        performed in the (n+2) nd interrupt .
  // - - - - - - - - - - - - - - - - - - - - - - - - - - - -

cic_calc_combs2: // calculate the comb filters, 2nd part

  // last2_comb1 = last_comb1;
  MOVF   BANKMASK(_last_comb1+0), W  // load LSB..
  MOVWF  BANKMASK(_last2_comb1+0)    // store MSB, etc..
  MOVF   BANKMASK(_last_comb1+1), W
  MOVWF  BANKMASK(_last2_comb1+1)
  MOVF   BANKMASK(_last_comb1+2), W
  MOVWF  BANKMASK(_last2_comb1+2)
# if( sizeof(cic_t) >= 4 )
  MOVF   BANKMASK(_last_comb1+3), W
  MOVWF  BANKMASK(_last2_comb1+3)
# endif

  // last_comb1  = comb1;
  MOVF   BANKMASK(_comb1+0), W  // load LSB..
  MOVWF  BANKMASK(_last_comb1+0)    // store MSB, etc..
  MOVF   BANKMASK(_comb1+1), W
  MOVWF  BANKMASK(_last_comb1+1)
  MOVF   BANKMASK(_comb1+2), W
  MOVWF  BANKMASK(_last_comb1+2)
# if( sizeof(cic_t) >= 4 )
  MOVF   BANKMASK(_comb1+3), W
  MOVWF  BANKMASK(_last_comb1+3)
# endif

  // last2_comb2 = last_comb2;
  MOVF   BANKMASK(_last_comb2+0), W  // load LSB..
  MOVWF  BANKMASK(_last2_comb2+0)    // store MSB, etc..
  MOVF   BANKMASK(_last_comb2+1), W
  MOVWF  BANKMASK(_last2_comb2+1)
  MOVF   BANKMASK(_last_comb2+2), W
  MOVWF  BANKMASK(_last2_comb2+2)
# if( sizeof(cic_t) >= 4 )
  MOVF   BANKMASK(_last_comb2+3), W
  MOVWF  BANKMASK(_last2_comb2+3)
# endif

  // last_comb2  = comb2;
  MOVF   BANKMASK(_comb2+0), W   // load LSB..
  MOVWF  BANKMASK(_last_comb2+0) // store MSB, etc..
  MOVF   BANKMASK(_comb2+1), W
  MOVWF  BANKMASK(_last_comb2+1)
  MOVF   BANKMASK(_comb2+2), W
  MOVWF  BANKMASK(_last_comb2+2)
# if( sizeof(cic_t) >= 4 )
  MOVF   BANKMASK(_comb2+3), W
  MOVWF  BANKMASK(_last_comb2+3)
# endif

  // At this point (with 24-bit integers), t = 69 instruction cycles...

  // last2_comb3 = last_comb3;
  MOVF   BANKMASK(_last_comb3+0), W  // load LSB..
  MOVWF  BANKMASK(_last2_comb3+0)    // store MSB, etc..
  MOVF   BANKMASK(_last_comb3+1), W
  MOVWF  BANKMASK(_last2_comb3+1)
  MOVF   BANKMASK(_last_comb3+2), W
  MOVWF  BANKMASK(_last2_comb3+2)
# if( sizeof(cic_t) >= 4 )
  MOVF   BANKMASK(_last_comb3+3), W
  MOVWF  BANKMASK(_last2_comb3+3)
# endif

  // last_comb3  = comb3;
  MOVF   BANKMASK(_comb3+0), W    // load LSB..
  MOVWF  BANKMASK(_last_comb3+0)  // store MSB, etc..
  MOVF   BANKMASK(_comb3+1), W
  MOVWF  BANKMASK(_last_comb3+1)
  MOVF   BANKMASK(_comb3+2), W
  MOVWF  BANKMASK(_last_comb3+2)
# if( sizeof(cic_t) >= 4 )
  MOVF   BANKMASK(_comb3+3), W
  MOVWF  BANKMASK(_last_comb3+3)
# endif
  GOTO cic_exit // end if < time to run the 'combs' and to update the CIC 'states' ? >

  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // above: second part of the comb calculations (running at fs_out)
  // below: no CIC calculations but output via UART.
  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

cic_emit_yy_lo: // in THIS phase, emit low-pass filtered output (yy, LSB) to the UART
  // unnecessary: BANKSEL(_yy) // select the RAM-bank with the CIC filter variables
  // Added 2016-02-21 : The PIC's analog input will be biased by Vref/2.
  // To have a (signed) output of 'zero' despite that (ADRES=2047 +/- noise),
  // subtract 32752(!) from 'yy'. The result is stored back in 'yy'.
  MOVLW  ( 32767 & 255 )       // Bias value (details above), lower byte
  SUBWF  BANKMASK(_yy+0),F     //  minus , destination in REGISTER ("file")
  MOVLW  ( 32767 >> 8 )        // Bias value, upper byte
  SUBWFB BANKMASK(_yy+1),F     // subtract offset from yy, bits 15..8
  //       The effect can be checked with Spectrum Lab's input monitor (scope).
  //       Select 'Coupling: DC' for that purpose.
  BTFSC   BANKMASK(_fflags),FFLAG_UART_START  // start DAC->UART output ? (ONLY AT THE LSBYTE)
  BSF     BANKMASK(_fflags),FFLAG_UART_ACTIVE //    yes -> activate the output NOW
  BTFSS   BANKMASK(_fflags),FFLAG_UART_ACTIVE // DAC->UART output active ?
  GOTO    cic_exit             //      no -> do NOT send 'yy' to the UART 
  MOVF    BANKMASK(_yy+0), W   // load lowpass filtered, decimated output (LSB)
  BANKSEL         (TX1REG)     // select RAM- and register bank of the UART TX register
  MOVWF   BANKMASK(TX1REG)     // send LSByte to UART
  //
  // When tested with the simulator, got here at t = 46 cycles after entering the ISR 
  //
  GOTO    cic_exit

cic_emit_yy_hi: // in THIS phase, emit low-pass filtered output (yy, MSB) to the UART
  // ex: BANKSEL(_yy)          // select the RAM-bank with the CIC filter variables
  BTFSS   BANKMASK(_fflags),FFLAG_UART_ACTIVE // DAC->UART output active ?
  GOTO    cic_exit             //      no -> do NOT send 'yy' to the UART 
  MOVF    BANKMASK(_yy+1), W   // load lowpass filtered, decimated output (MSB)
  BANKSEL         (TX1REG)     // select RAM- and register bank of the UART TX register
  MOVWF   BANKMASK(TX1REG)     // send MSByte to UART
  //
  // When tested with the simulator, also got here at t = 46 cycles after entering the ISR. 
  // That means, between writing the LSByte (above) and the MSByte (here),
  // exactly TWO 80-kHz interrupts have passed, and the UART had
  // 2/(80kHz) = 25 us to send the previous byte. At 460800 bits/second,
  // and 10 "bits per byte" (including start- and stopbit), the UART requires
  // 10/(460.8kHz) = 21.7 us to send a single byte, thus NO NEED TO WAIT here.
  // GOTO    cic_exit    // don't waste TWO precious cycles (for a 'goto') here


cic_exit:
  // Timing tests with MPLAB (simulator), after zeroing MLPAB's 'stopwatch'
  // with a breakpoint on the entry of ADC_Sampling_ISR(), and another breakpoint
  // on the closing curly brace below ("return from interrupt") :
  //  downsample_cnt 0 -> 1 : spent ~ 53 cycles in the interrupt (integrators and UART output only)
  //  downsample_cnt 1 -> 2 : spent ~ 86 cycles in the interrupt (comb calculations, 2nd part)
  //  downsample_cnt 2 -> 3 : spent ~ 51 cycles in the interrupt (integrators and UART output only)
  //  downsample_cnt 3 -> 0 : spent ~ 94 cycles in the interrupt (comb calculations, 1st part)
  // Confirmed with an oscilloscope and real target hardware,
  //  using the pulse length between IOP_ADCCLK_PIN_HI and IOP_ADCCLK_PIN_LO.
  //          Measured 8.6 us -> Ok (with 80 kHz interrupt frequency ~ 12.5 us).
  //          Enough CPU time remaining for the main loop.
  //          No adverse effect on the GPSDO's phase noise. 
  //          Reasonable image rejection - see spectra in the "html" folder.
# endasm 
#endif // < use the original C code, or hand-crafted PIC assembler for the CIC filter ? >

  IOP_ADCCLK_PIN_LO;    // -> BCF 0x0C, 0x04
  PIR1bits.TMR2IF = 0;  // clear "Timer2 Interrupt Flag" (don't let the interrupt 'pend')

} // end ADC_Sampling_ISR() [the one-and-only interrupt service routine for midrange PICs]



// ... other subroutines which MAY not work properly when crossing a 256-byte boundary go here ...

//---------------------------------------------------------------------------
void ClearMem( uint8_t *pbDest, uint8_t nBytes) // clears a block of memory 
{
  while(nBytes--)
   { *pbDest++ = 0x00;
   }
} // end ClearMem()

//---------------------------------------------------------------------------
void LimitToPlusMinus32000( int16_t *pi16 )
{
  // If the XC8 compiler is smart enough, it uses one of the two
  // 'INDF' registers to pass the pointer, and produce only a few lines of assembly.
  // But the result (in the *.am listing) looked terrible - don' try to understand it !
  if( *pi16 > 32000 )
   {  *pi16 = 32000;
   }
  if( *pi16 < -32000 ) // -> movf, movwf, clrf fsr1h, ... (about 20!! instructions for this line)
   {  *pi16 = -32000;  // -> movf, movwf, clrf fsr1h, movlw 0, movwi [0]fsr1, movlw 0x83, movwi [1]fsr1, superfluent goto, return .
   }
} // end LimitToPlusMinus32000()

//---------------------------------------------------------------------------
void LimitYToPlusMinus32k(void)
{
  if( i32TempY.i32 > 32767 )
   {  i32TempY.i32 = 32767;
   }
  else if( i32TempY.i32 < -32767 )
   {  i32TempY.i32 = -32767;
   }
  // This could be easily optimized .. first look at the upper 16 bits only,..
} // end LimitYToPlusMinus32k()

//---------------------------------------------------------------------------
void FillString( char *pszDest, uint8_t pattern, uint8_t len)
{ 
  while(len--)     // <- even THIS was too complicated for CC5X (so WB ditched it)
   { *pszDest++ = pattern;
   }
  *pszDest = '\0';
}

//---------------------------------------------------------------------------
void Plot( char *pszDest, uint8_t pattern, int pos )
 // Plots into a space-filled string :
 // min                                  center                                max|
 // |                                      |                                      |
 // |                                      |                                      |
 // 0         1         2         3         4         5         6         7
 // 01234567890123456789012345678901234567890123456789012345678901234567890123456789
 // <------------------ 39 characters ---->|<------------- 39 characters --------->
 // |                                     "0"                        
{ pos += 39;   // character index for the zero indicator (vertical center line)
  if( pos < 0 )
   {  pos = 0;
   }
  if( pos > 78 )
   {  pos = 78;
   }
  pszDest[pos] = pattern;
}

//---------------------------------------------------------------------------
void wait_10ms(void)
{
  wWaitCounter=15625;  // 15625 for Fcycle = 10 MHz, XC8 'free' 

  while( wWaitCounter-- )
   {  
   }
  // when running on the internal oscillator, the above loop
  //  took 15 seconds instead of 10 ms 
}

//---------------------------------------------------------------------------
void wait_1ms(void)
{
  wWaitCounter=1562;  // 1562 for Fcycle = 10 MHz, XC8 'free'
  while( wWaitCounter-- )
   {  
   }
  // when running on the internal oscillator, the above loop
  //  took 1.5 seconds instead of 1 ms
}

//---------------------------------------------------------------------------
void ResetIntegralsAndLowpass(void) // -> ErrorIntegral = LowpassIn = LowpassOut = 0
{ i16LowpassIn                 = 0;
  i16ErrorIntegral             = 0;
  i32ErrorIntegralIntegral.i32 = 0;
  i32LowpassOut.i32            = 0;
} // end ResetIntegralsAndLowpass()


//---------------------------------------------------------------------------
void ProcessCapturedSyncPulse(void)
  // [in] CCPR1 : 16-bit hardware timer, captured at the rising edge
  //              of the GPS sync pulse .
  //              Counts the number of OCXO clock cycles (10 MHz) .
  // [out] i16FreqOffset = measured OXCO frequency minus 10 MHz .
  //         Ideally zero. Unit is 'OCXO cycles per second'.
  //         With f_oxco = 10.0x MHz, one step is approximately 100 ns.
  //         The SUMMED UP i16FreqOffset readings, taken over time,
  //         but limited to a certain maximum is later used
  //         to *PHASE-LOCK* the OCVCXO to the GPS reference
  //          ( -> I controller ) .
  //         The MOMENTARY i16FreqOffset reading will steer
  //         the output control voltage, like a *FREQUENCY LOCK* loop
  //          ( -> P controller ) .
  // [out] i16ErrorIntegral = error integral. Details further below.
  //       i32ErrorIntegralIntegral = integral of i16ErrorIntegral.
  // [out] i16LowpassIn = input for the software lowpass, see block diagram.
{

  // any non-brain-damaged C compiler allows THIS:
  i16FreqOffset = (int16_t)CCPR1 - (int16_t)wPrevCapture;
  wPrevCapture  = CCPR1;   // save 16-bit timer capture register for the next time

  // The timer- and capture registers are only 16 bit wide.
  // With a 1-second sync interval, and 10 MHz OCXO, the difference
  // between two readings should be 10000000 (decimal) = 0x00989680 .
  // Only the lower 16 bits are actually read   ->   ideally 0x9680 .
  // After subtracting the above 'ideal' value, the rest should be "almost zero":
  i16FreqOffset -= 0x9680; // -> error value, signed, ideally ZERO,
  // one step = ca. 100 ns error ( = 1 / 10 MHz capture timer input frequency,
  // which IN THIS CASE (PIC16F1783) is , because Ftimer = Fcycle = Fosc/4,
  // Fosc=4*Focxo -> capture timer input frequency = 4 * 10 MHz / 4 = 10 MHz.
  // A 'better' microcontroller where the capture-timer runs at 40 MHz
  // would give 25 ns instead of 100 ns capture resolution.
  // But anyway, we can measure the 10 MHz OCXO frequency with 1 Hz resolution
  // at 1 second gate time, thus the UNIT of 'i16FreqOffset' is simply HERTZ !
  // Verified by measuring 16FreqOffset at various control voltages,
  // VCOCXO made by OFC, labelled "OFC MC598X4 - 010W  10.000000 MHz"
  //                                                           |_ 1Hz ?!
  //  Vctrl =  0 V  ->  10 MHz - 57 Hz  ->  i16FreqOffset = -57
  //  Vctrl =  5 V  ->  10 MHz +  0 Hz  ->  i16FreqOffset = 0
  //  Vctrl = 12 V  ->  10 MHz + 80 Hz  ->  i16FreqOffset = +80 .
  // -> "Electrical" frequency tuning range = -57/10e3 = -5.7e-6 = -5.7 ppm (?!)
  //                                       to +80/10e3 =  8.0e-6 = +8.0 ppm (!?)
  //    A quite large tuning range for a "10.000000 MHz"-VCOCXO !
  //    External circuitry (resistor divider for the Vctrl output)
  //    now reduces the tuning range to +/- 5 Hz .

  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // above: coherent measurement of the VCOCXO frequency error
  // below: implementation of the control loop, during and after warm-up
  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  if(  bSyncPulseCounter < 255 )
   { ++bSyncPulseCounter;
     if( bSyncPulseCounter > 1 ) // the pulses produces garbage readings !
      { 
        if( (i16FreqOffset < -2/*Hz*/) || (i16FreqOffset > 2/*Hz*/) )
         { bSyncPulseCounter = 1;  // frequency still "too far off" -> prolong 'warm-up' phase
         }
        // During the initial 255 seconds (or more if 'the oven is cold'),
        //   use a faster control loop to find the best possible Vctrl 'bias' point .
        LimitToPlusMinus32000( &i16ErrorIntegral );
        // Any non-brain-damaged, real C compiler should have no trouble with this (CC5X didn't understand)
        i16ErrorIntegral += i16FreqOffset; // <- won't overflow at +/- 32767 ...
        i32TempY.i32 = 64/*Ki*/ * i16ErrorIntegral; // fast-settling integrating controller only
        LimitYToPlusMinus32k();
        i16LowpassIn = i32TempY.i16.lo;
        if( bSyncPulseCounter==255 ) // finished coarse measurement -> switch to normal loop controller (further below)
         { // Use the current 'Vctrl' value as the new 'Vctrl bias' value.
           // Under ideal conditions, this will keep the error integral,
           //  and the error-integral-integral ZERO after switching the loop mode:
#         if( C_VCTRL_NEGATIVE_SLOPE )
           // LATER: PSMC1DC = (uint16_t)( i32LowpassOut.i16.hi + u16VctrlBias ); // frequency too high -> INCREASE PWM DC
           u16VctrlBias += i32LowpassOut.i16.hi;
#         else
           // LATER: PSMC1DC = (uint16_t)( u16VctrlBias - i32LowpassOut.i16.hi ); // frequency too high -> DECREASE PWM DC
           u16VctrlBias -= i32LowpassOut.i16.hi;
#         endif
           ResetIntegralsAndLowpass(); // -> ErrorIntegral = LowpassIn = LowpassOut = 0
           bLowpassSpeed = 1;    // switch to the low-speed output lowpass (tau = 107 sec)
#         if( SWI_STANDALONE_SIMULATOR )
           EMU_fSimulationPaused        = TRUE;
#         endif // SWI_STANDALONE_SIMULATOR ?
         } // end if( bSyncPulseCounter==255 )
      }   // end if( bSyncPulseCounter > 2 )
   }     // end if( bSyncPulseCounter < 255 )
  else // 'norma' operation -> use slow, low-noise loop controller.
   { // Accumulate the error signal (Regelabweichung), using a numeric integrator:
     if( (i16FreqOffset >= -SWI_MAX_FREQ_OFFSET_HZ) && (i16FreqOffset <= SWI_MAX_FREQ_OFFSET_HZ) )
      { // Integrate the error value, limited to 16 bit signed :
        LimitToPlusMinus32000( &i16ErrorIntegral );
        i16ErrorIntegral += i16FreqOffset;   // <- won't overflow at +/- 32767
        i32ErrorIntegralIntegral.i32 += i16ErrorIntegral;
        // Beware: two integrators in series, in a loop with NEGATIVE feedback
        //         can easily turn the control loop into an oscillator .
        // To prevent oscillation, extra measures are taken further below.
        //
        // Phase (error) = integral of frequency (error). In this case 'coherent'
        // because the capture-feeding timer keeps running continously (it's not
        // stopped / started or restarted with each new sync pulse).
        // Thus i16ErrorIntegral can be used to steer the VCOCXO (Vctrl)
        // like a *PLL* later (phase locked, not just a frequency locked loop).
        // If each sync pulse would restart the measurement of the frequency offset,
        // there would be a phase slip. This way there is no phase slip,
        // and i16ErrorIntegral keeps track all oscillator cycles *since initial lock* .
        // More details in gpsdo_pic_notes.htm .
        //
        // For absolute phase measurements and coherent PSK experiments,
        // the VCOCXO phase (measured against the GPS signal) shall always
        // 'crawl back' to the original value (of the first lock).
        // Integrator #1 would be sufficient to keep the FREQUENCY error zero
        //               in the long run, but cannot eliminate the PHASE error.
        // Integrator #2 was added to bring down i16ErrorIntegral to zero
        //               (-> removes phase error after sufficiently long time).
        //
        //
        //  Block diagram with FREQUENCY- and PHASE- locking loop
        //  -----------------------------------------------------
        //                                 __________
        //                                |          |
        //                              _\|Integrator|_ ErrorIntegralIntegral
        //                             | /|    #2    | |
        //                             |  |__________|(*) Ki2
        //               ____________  |               |    ____________
        //              |            | |         Ki1   |   | Very 'slow'|
        //  freqOffset--| Integrator |------>----(*)--(+)--| Lowpass    |-->--.
        //     /|\      |     #1     | ErrorIntegral       | (1st order)|     |
        //      |       |____________|                     |____________|     |
        //  ____|____       ________         _______       _____              |
        // | Timer   |     |        |       |analog |     |     | duty cycle  |
        // | Capture |--<--| VCOCXO |---<---|lowpass|--<--| PWM |-<----(-)----'
        // |_________|     |________| Vctrl |_______|     |_____|       |
        //     /|\                                                     /|\   .
        //      | sync pulse                                            |
        //  ( 1 PPS from GPS )                                      VctrlBias
        //
        //
        //
        // Estimation of the control path 'gain' (Regelstrecke):
        //         Input (PWM for Vctrl):  0 ... 65535     (ideally 32767)
        //         Output(freqOffset)   : -5 Hz .. + 5 Hz  (ideally zero)
        //         G_path = 10 Hz / 65536 = approximately 150 uHz per PWM-step
        //
        // Test cases :
        //
        //  (2) Single 'I' controller only (integrating),
        //              can keep the FREQUENCY error zero in the long run
        //                (because an integrator has infinite gain at DC),
        //              but doesn't return to original PHASE:
        //        Ki1 = 256 : loop oscillation period ~ 350 seconds,
        //              Error peaks [Hz]: -1.127 0.510 -0.231 0.105 -0.048 0.023 -0.011 0.002 .
        //              Completely stabilized after ~ 1870 seconds .
        //        Ki1 = 64 : loop oscillation period ~ 754 seconds,
        //              Error peaks [Hz]: -5.000 1.520 -0.262 0.045 -0.008 .
        //              Completely stabilized after ~ 1945 seconds .
        //        Ki1 = 32 : nicely damped loop oscillation period ~ 1270 seconds,
        //              Error peaks [Hz]: -5.000 1.064 -0.053 0.004 .
        //              Completely stabilized after ~ 2000 seconds .
        //              ( close enough to the limit of aperiodic oscillation
        //                = aperiodischer Grenzfall.. what's that in english ?)
        //        Ki1 = 16 : no detectable loop oscillation period,
        //              Error peaks [Hz]: -5.000 0.696  (nothing more, strong damping).
        //              Completely stabilized again after ~2342 seconds,
        //              but more remaining phase error when VCOCXO had to be 'pulled far' !
        //              (no remaining frequency error, but WB also wanted to eliminate
        //               the PHASE error, which seemed to require an additional 'slow loop')
        //
        //  (3) Double 'I' controller with two integrators as in the block diagram,
        //              can bring down the 'phase error' to zero (in the long run),
        //              but needs extra care to avoid oscillation.
        //         Ki2 must be much less than Ki1 (e.g. Ki1=32, Ki2=0.0625) .
        //
#      if(0) // single, simple 'I' controller:
        i32TempY.i32 = 32/*Ki*/ * i16ErrorIntegral; // integrating controller only
#      else  // double integrator:
        // (error integral plus an 'integral of the error integral')
        i32TempY.i32 = 32/*Ki*/ * i16ErrorIntegral    // integrated frequency error (-> phase error)...
               + (i32ErrorIntegralIntegral.i32 >> 4); // integrated phase error (with very low 'gain' to avoid oscillation)
                                               // |___ divide by 2^N, e.g.: Ki2 = 1 / (2^4) = 0.0625
#      endif // < simple or double 'I' controller > ?
            // Tests results with EMU_dblExtraOscErr_Hz = 1 plus 'full swing' step at t_sim = 10 sec,
            //       time constant of the output lowpass still tau = 65536 / 610.3 Hz = 107 sec.
            //    Ki1 | Ki2     | Settling time   | Peak error (frequency offsets),
            //        |         | ( -> *PAUSED* ) |  remarks (overshoot ? etc..)
            //   -----+---------+-----------------+---------------------------
            //    32  | 0.03125 | 8243 s = 2.3hrs | Error peaks [Hz]: -4.000 1.409 -0.301
            //    32  | 0.0625  | 3110 s = 52 min | Error peaks [Hz]: -4.000 1.518 -0.551 0.032  ("author's choice")
            //    32  | 0.125   | 5416 s = 1.5hrs | Error peaks [Hz]: -4.000 1.726 -1.061 0.434 -0.181 0.075 -0.031 0.013
            //    16  | 0.0625  | 9259 s = 2.6hrs | Error peaks [Hz]: -4.000 1.465 -0.944 0.469 -0.233 0.116 -0.058 0.029
            //    64  | 0.0625  | 8995 s = 2.5hr  | Error peaks [Hz]: -4.000 1.665 -0.468 -0.059 (too much phase noise)
            // -> decided to use Ki=32, Ki2=0.0625 (which don't require multiply or divide, only bitwise shifting)
            //
        LimitYToPlusMinus32k();
        i16LowpassIn = i32TempY.i16.lo;
      }
     else // frequency offset is "off the spec" .. oscillator warm-up ?
      { // wait until the oscillator stops drifting !
      }
   } // end if < not the FIRST sync pulse >
} // end ProcessCapturedSyncPulse()

//---------------------------------------------------------------------------
void UpdatePFO(void)
  // Updates the PFO (programmable frequency output) .
  // [in]  EEPROM[ EEP_ADDR_PFO_PERIOD_L, _H, _FFA ] : period and 'fractional' part
  // [out] PSMC2PR; PSMC2DC = PSMC2PR>>1;
{
  // PIC16F1782/3 datasheet, Ch. 24.9, p. 220, "Fracional Frequency Adjust (FFA)":
  // ------------------------------------------------------------------------
  //  > FFA is a method by which PWM resolution can be
  //  > improved on 50% fixed duty cycle signals. Higher
  //  > resolution is achieved by altering the PWM period by a
  //  > single count for calculated intervals. This increased
  //  > resolution is based upon the PWM frequency
  //  > averaged over a large number of PWM periods. For
  //  > example, if the period event time is increased by one
  //  > psmc_clk period (TPSMC_CLK) every N events, then
  //  > the effective resolution of the average event period is
  //  > TPSMC_CLK/N. (...)
  //  > The FFA function is only available when using one of the two Fixed Duty Cycle 
  //  > modes of operation. In fixed duty cycle operation each PWM period is comprised
  //  > of two period events. That is why the PWM periods in Table 24-3 example 
  //  > calculations are multiplied by two as opposed to the normal period calculations 
  //  > for normal mode operation.
  //  >
  // W.B.: The GPSDO's FPO primary purpose was to deliver a 13.3333333 MHz signal
  //       (multiplied to 66.6666666 MHz for an SDR-IQ), which would be impossible 
  //       with the *basic* formula for the '50 % fixed duty cycle' mode of the PSMC:
  //          f_out    = f_psmc / ( 2 * (PSMCxPR + 1) ) 
  //          f_out_m0 = 40 MHz / ( 2 * (   0    + 1) ) = 20    MHz
  //          f_out_m1 = 40 MHz / ( 2 * (   1    + 1) ) = 10    MHz
  //          f_out_m2 = 40 MHz / ( 2 * (   2    + 1) ) = 6.666 MHz
  //  If the FFA is *not* needed (-> less phase noise) :
  //          f_out    = f_psmc / ( PSMCxPR + 1 )
  //          f_out_13 = 40 MHz / (   2     + 1 ) = 13.3333 MHz ("without" phase noise)
  //  -> If the FFA isn't necessary, don't use it !  

} // end UpdatePFO()



//---------------------------------------------------------------------------
// main function
//---------------------------------------------------------------------------


void main(void)
{

  //-------------------------------------------------------------------------
  // initialize all I/O ports
#if SWI_USE_BINARY_NUMBERS  // unfortunately, some C compilers don't understand these nice binary constants:
  ANSELA = 0b00000011; // Configure RA0+RA1 as analog pins, the rest for digial I/O
  ANSELB = 0b00000000; // Configure all pins of port B as digital I/O (not analog-in)
  APFCON = 0b01000110; // "ALTERNATE PIN FUNCTION CONTROL REGISTER" (page 111) ...
         //  ||||||||______ b0 : CCP2 Input/Output Pin Selection
         //  |||||||              1 = CCP2 is on pin RB3
         //  |||||||              0 = CCP2 is on pin RC1
         //  |||||||_______ b1 : RX Pin Selection
         //  ||||||               1 = RX is on pin RB7 / ICSPDAT   <<<<
         //  ||||||               0 = RX is on pin RC7
         //  ||||||________ b2 : TX Pin Selection
         //  |||||                1 = TX is on pin RB6 / ICSPCLK   <<<<
         //  |||||                0 = TX is on pin RC6
         //  |||||_________ b3 : MSSP Serial Data (SDA/SDI) Output Pin Selection
         //  ||||                 1 = SDA/SDI is on pin RB6
         //  ||||                 0 = SDA/SDI is on pin RC4
         //  ||||__________ b4 : MSSP Serial Clock (SCL/SCK) Pin Selection
         //  |||                  1 = SCL/SCK is on pin RB7
         //  |||                  0 = SCL/SCK is on pin RC3
         //  |||___________ b5 : MSSP SDO Pin Selection
         //  ||                   1 = SDO is on pin RB5
         //  ||                   0 = SDO is on pin RC5
         //  ||____________ b6 : CCP1 Input/Output Pin Selection (here: input for GPS SYNC)
         //  |                    1 = CCP1 is on pin RB0           <<<<
         //  |                    0 = CCP1 is on pin RC2
         //  |_____________ b7 : C2OUT Pin Selection
         //                       1 = C2OUT is on pin RA6
         //                       0 = C2OUT is on pin RA5

  // > SLEW RATE CONTROL  (PIC16F1782/3 datasheet page 112, applies to other ports, too)
  // > The SLRCONA register controls the slew rate option for each port pin.
  // > Slew rate control is independently selectable for each port pin.
  // > When an SLRCONA bit is set, the corresponding port pin drive is
  // > slew rate limited. When an SLRCONA bit is cleared, the corresponding
  // > port pin drive slews at the maximum rate possible.
  // WB: The RESET state of the SLRCONx bits is H="slow" (limited slew rate),
  //     which caused a 13.3333 MHz output (from the PSMC) to reach
  //     ONE VOLT ONLY instead of 3.5 V !  Thus, turn the slew-rate-limiters OFF.
  //     After that, the rise- and fall times on RC6 / PSMC2A were below 5 ns.
  SLRCONA = 0b00000000;  // slew rate control for RA7..RA0 : L="fast", H="slow" (!)
  SLRCONB = 0b00000000;  // slew rate control for RB7..RB0 : L="fast", H="slow" (!)
  SLRCONC = 0b00000000;  // slew rate control for RC7..RC0 : L="fast", H="slow" (!)
  // Port Direction registers. Microchip calls them "tristate" registers:
  TRISA  = PORTA_DIRECTIONS; // initialise port A directions... beware, TRISA is in another bank .
           // The C compiler should have inserted something like "MOVLB 0x1" here, which SWITCHES THE REGISTER BANK in a PIC16F1xxx !
  PORTA  = PORTA_INIT_DATA ; // initial data for port A
  //
  TRISB  = PORTB_DIRECTIONS; // initialise port B ...
  PORTB  = PORTB_INIT_DATA ; // initial data for port B
  TRISC  = PORTC_DIRECTIONS; // initialise port C ...
  PORTC  = PORTC_INIT_DATA ; // initial data for port C
  // XC8 produced a 3-instruction sequence for the above line, to switch the register bank:
  // 0x7F6: MOVLW 0x7
  // 0x7F7: MOVLB 0x0        // "Move literal to low nibble in BSR", in simple words: "switch the register bank"
  // 0x7F8: MOVWF PORTC
  // -> bsf IOP_PLL_EN  ; idle state for 'ENable' = H = no serial transfer
#endif // ( SWI_USE_BINARY_NUMBERS ) .. only required to compile the 'real' firmware, not supported by Borland C



  //-------------------------------------------------------------------------
  // Initialize hi-res PWM output (to steer the OCVCXO and produce the SDR clock).
  //    The PWM output can be 'amplified' by an external CMOS gate if necessary,
  //    which *must* be fed with a noise-free reference voltage .
  //    Duty cycle resolution is over 16 bits (due to the PSMC, details below).
  //    Fortunately the PSMC (in a PIC16F178x) is not limited to Fosc/4 .
  //    But unfortunately Timer1 and -being connected to the same timer-
  //    the Capture/Compare units (CCP1+CCP2) still are limited to Fosc/4 .. sigh.
  //
  //  "PSMC" (Programmable Switch Mode Control, PIC16F1783 has TWO of them, x=1 or 2.
  //         Here: PSMC1 used as simple 16-bit PWM for the OCVCXO control voltage,
  //               PSMC2 used to generate a clock for the external SDR or "VFO".
  //
  //         "PxCSRC"         "PxCPRE"
  //     bits 1..0 |     bits 1..0 |
  //              \|/             \|/      16-bit counter
  //              ___            _____       __________
  // PSMCxCLK ->-|   |  40MHz ? |     |     |          |
  // "64 MHz" ->-|MUX|---->-----| / 1 |-->--| PSMCxTMR |---
  // Fosc (*) ->-|___|          |_____|     |__________|   |
  //                                        CLR /|\        |
  //  (*) Fosc, not Fosc/4 !           "period   |         |
  //                                    event"   |        \|/
  //                    _________                |         |
  //                   |PSMCxPR  |-->--compare --          |
  //                   |_________|       /|\               |
  //                   ("period")         |________________|
  //                                                       |
  //                    _________                    _     |
  //                   |PSMCxPH  |-->--compare --> _/  out |  (-> rising edge of PWM output,
  //                   |_________|       /|\               |      PIC16F1783 : PSMC1A = RC0,
  //                   ("phase")          |________________|      pin 11 of 28-pin "SPDIP" )
  //                    _________                  _       |
  //                   |PSMCxDC  |-->--compare -->  \_ out |  (-> falling edge of PWM output)
  //                   |_________|       /|\               |
  //                   ("duty cycle")     |________________|
  //
  // Details about the follwing register configrations
  //     in the PIC16F1782/3 datasheet (DS40001579E), page 245,
  //     "Table 24-5: SUMMARY OF REGISTERS ASSOCIATED WITH PSMC".
  //  Each register in Table 24-5 is even LINKED to the relevant details (pages).
  // Fortunately, the C compiler (XC8) eliminates the daunting task of
  // selecting the correct register banks.
  // That's the purpose 'MOVLB' (similar as BANKSEL in the old days).
#if SWI_USE_BINARY_NUMBERS
  PSMC1CON  = 0b00000000;  // PSMC1 CONTROL REGISTER ..
  PSMC2CON  = 0b00000000;  // PSMC2 CONTROL REGISTER ..
            //  |   |__|___ b3..0 : 0000 = single PWM waveform generation
            //  |___________ b7 : L=PSMC module disabled
  PSMC1MDL  = 0b00000000;  // MODULATION CONTROL REGISTER ..
  PSMC2MDL  = 0b00000000;  // .. same for the 2nd PSMC
            //  |   |__|___ b3..0 : 0000 = "Modulation Source is PxMDLBIT"
            //  |___________ b7 : L = "Periodic Modulation Mode Enabled"
  PSMC1SYNC = 0b00000000;  // SYNCHRONIZATION CONTROL REGISTER
  PSMC2SYNC = 0b00000000;  // .. same for the 2nd PSMC
            //         \|___ b1..0 : 00 = "PSMC is sync'd with PERIOD event"
            //                       10 = PSMC1 is synchronized with the PSMC2 module
  PSMC1CLK  = 0b00000000;  // PSMC CLOCK CONTROL REGISTER
  PSMC2CLK  = 0b00000000;  // .. same for the 2nd PSMC
            //    \|  \|____ b1..0 : 00 = Fosc system clock (here: 40 MHz, "slightly overclocked")
            //     |                 01 = "64 MHz" clock in from PLL
            //     |________ b5..3 : 00 = prescaler divide by ONE
  PSMC1POL  = 0b00000000;  // PSMC POLARITY CONTROL REGISTER. 0 = active-high
  PSMC2POL  = 0b00000000;
  PSMC1BLNK = 0b00000000;  // PSMC BLANKING CONTROL REGISTER. 0 = no blanking
  PSMC2BLNK = 0b00000000;
  PSMC1REBS = 0b00000000;  // PSMC RISING EDGE BLANKED SOURCE REGISTER. here: no blanking input
  PSMC2REBS = 0b00000000;
  PSMC1FEBS = 0b00000000;  // PSMC FALLING EDGE BLANKED SOURCE REGISTER. here: no blanking....
  PSMC2FEBS = 0b00000000;
  PSMC1PHS  = 0b00000001;  // PSMC PHASE SOURCE REGISTER
  PSMC2PHS  = 0b00000001;
            //  |   ||||_____ b0 : P1PHST: PSMCx Rising Edge Event on Time Base match ?
            //  .   ...            H = Rising edge event will occur when PSMCxTMR = PSMCxPH
            //                     L = time base will not cause rising edge event
  PSMC1DCS  = 0b00000001;  // PSMC DUTY CYCLE SOURCE REGISTER
  PSMC2DCS  = 0b00000001;
            //  | ...  |_____ b0 : H="Falling edge event will occur when PSMCxTMR = PSMCxDC"
            //  |                     (guess this is what we need for entirely 'DC'-register controlled PWM-DutyCycle)
            //  |                  L= Time base will not cause falling edge event
            //  |____________ b7 : H="falling edge event will occur when PCMCxIN pin goes true"
            //                     L="PSMCxIN pin will not cause falling edge event"
  PSMC1PRS  = 0b00000001;  // PSMC PERIOD SOURCE REGISTER
  PSMC2PRS  = 0b00000001;
            //  | ...  |_____ b0 : H="Period event will occur and PSMCxTMR will reset when PSMCxTMR = PSMCxPR"
            //  |                     (guess this is what we need for 'PR'-register controlled PWM-frequency)
            //  |                  L="Time base will not cause period event"
            //  |____________ b7 : H="Period event will occur and PSMCxTMR will reset when PSMCxIN pin goes true"
            //                     L="PSMCxIN pin will not cause period event"
#endif // ( SWI_USE_BINARY_NUMBERS )
            // PWM period = ( PSMCxPR[15..0] + 1 ) / Fpsmc_clk .
            //  Examples:  (65535+1) / 40 MHz = 1.384 ms =  1 / 610.351564   Hz
            //             (  84 +1) / 40 MHz = 2.125 us =  1 / 470.5882353 kHz
            //             (  83 +1) / 40 MHz = 2.100 us =  1 / 476.1904762 kHz
            //             (   2 +1) / 40 MHz =    75 ns =  1 / 13.33333333 MHz
  PSMC1PR   = 0xFFFF;      // PSMC PERIOD COUNT REGISTER ("C" allows 16 bit access)
#if ( EEPROM_SIZE > 0 )
  PSMC2PRL  = EEPROM_READ( EEP_ADDR_PFO_PERIOD_L ); // PSMC PERIOD COUNT REGISTER (here: 'programmable frequency output')
  PSMC2PRH  = EEPROM_READ( EEP_ADDR_PFO_PERIOD_H ); // upper 8 bits of the " " " "
#else
  PSMC2PR   = 0x00002;     // hard-coded output frequency (13.3333 MHz for SDR-IQ) 
#endif
            // PWM duty cycle (DC) to 50 % initially :
  PSMC1DC   = 0x7FFF;      // PSMC 1 DUTY CYCLE COUNT REGISTER (16 bit access)
  PSMC2DC   = PSMC2PR>>1;  // PSMC 2 DUTY CYCLE COUNT REGISTER (16 bit access)
            // Note (about reprogramming the DutyCycle register later):
            // > The 16-bit duty cycle value is double-buffered
            // > before it is presented to the 16-bit time base for comparison.
            // > The buffered registers are updated on the first period
            // > event Reset after the PSMCxLD bit of the PSMCxCON
            // > register is set.
            //
            // rising edge of PWM output at the BEGIN of each cycle (0=no phase offset) :
  PSMC1PHH  = 0x00;        // PSMC PHASE COUNT HIGH BYTE REGISTER
  PSMC2PHH  = 0x00;
  PSMC1PHL  = 0x00;        // PSMC PHASE COUNT LOW BYTE REGISTER
  PSMC2PHL  = 0x00;
#if SWI_USE_BINARY_NUMBERS
            // dead-band, auto-shutdown, etc: not required yet :
  PSMC1ASDC = 0b00000000;  // PSMC AUTO-SHUTDOWN CONTROL REGISTER (0=no shutdown)
  PSMC2ASDC = 0b00000000;
  PSMC1ASDL = 0b00000000;  // PSMC AUTO-SHUTDOWN OUTPUT LEVEL REGISTER
  PSMC2ASDL = 0b00000000;
  PSMC1ASDS = 0b00000000;  // PSMC AUTO-SHUTDOWN SOURCE REGISTER
  PSMC2ASDS = 0b00000000;
#endif // ( SWI_USE_BINARY_NUMBERS )
  PSMC1DBR  = 0x00;        // PSMC RISING EDGE DEAD-BAND TIME REGISTER
  PSMC2DBR  = 0x00;        // (number of clock periods in rising edge "dead band")
  PSMC1DBF  = 0x00;        // PSMC FALLING EDGE DEAD-BAND TIME REGISTER (..)
  PSMC2DBF  = 0x00;        // (unfortunately 8 bits only, otherwise this could be used
                           //  to generate a square-wave multi-phase output...)
#if SWI_USE_BINARY_NUMBERS
  PSMC1FFA  = 0b00000000;  // PSMC FRACTIONAL FREQUENCY ADJUST REGISTER
            //      |__|__ b3..0 : number of clock periods to add each period event time.
            //                     The fractional time period is 1 / (16*psmc_clk) .
            // (Note: this fractional division causes phase noise.
            //        We don't need if for PSMC1, which drives the Vctrl PWM)
#endif // SWI_USE_BINARY_NUMBERS ?
  // The 2nd PSMC is used as the 'programmable frequency output', 
  // so use the register value from the EEPROM for the 'fractional frequency adjust':
#if ( EEPROM_SIZE > 0 )
  bTemp = EEPROM_READ( EEP_ADDR_PFO_FFA ); // bit 5: FFA-'enable', bits 3..0: FFA-'value'
  PSMC2FFA  = bTemp & 0x0F; // 0..15 "PSMC clocks" (details on the FFA in 'SetPFO()' )
#else
  PSMC2FFA  = 0x00;        // no 'fractional frequency adjust' 
#endif
  PSMC1BLKR = 0x00;        // PSMC RISING EDGE BLANKING TIME REGISTER
  PSMC2BLKR = 0x00;
            // > BLKR = Rising Edge Blanking Time bits
            //   = "Unsigned number of PSMCx psmc_clk clock periods in rising edge blanking"
  PSMC1BLKF = 0x00;        // PSMC FALLING EDGE BLANKING TIME REGISTER
  PSMC2BLKF = 0x00;
#if SWI_USE_BINARY_NUMBERS
  PSMC1STR0 = 0b00000001;  // PSMC STEERING CONTROL REGISTER 0
  PSMC2STR0 = 0b00000001;
            //  ||||||||______ b0 : PWM Steering PSMCxA Output Enable, mode dependent, too complex to be explained here..
            //  nc|||||_______ b1 : PWM Steering PSMCxB Output Enable bit
            //    ||||________ b2 : PWM Steering PSMCxC Output Enable bit
            //    |||_________ b3 : PWM Steering PSMCxD Output Enable bit
            //    ||__________ b4 : PWM Steering PSMCxE Output Enable bit
            //    |___________ b5 : PWM Steering PSMCxF Output Enable bit .
            // For MOST bits in ..STR0: H = "Single PWM output is active on pin PSMCx"
            // See PIC16F178x datasheet, page 245, "Table 24-5: SUMMARY OF REGISTERS ASSOCIATED WITH PSMC".
  PSMC1STR1 = 0b00000000;  // PSMC STEERING CONTROL REGISTER 1
  PSMC2STR1 = 0b00000000;
            //  |     ||______ b0 : 3-Phase Steering High Side Modulation Enable bit
            //  |     |_______ b1 : 3-Phase Steering Low Side Modulation Enable bit
            //  |_____________ b7 : PWM Steering Synchronization bit. 0 = "immediately"
  PSMC1INT  = 0b00000000;  // PSMC TIME BASE INTERRUPT CONTROL REGISTER ..
  PSMC2INT  = 0b00000000;  // (counter overflow interrupt enable bit, etc etc, 0 = disabled)
  PSMC1OEN  = 0b00000001;  // PSMC OUTPUT ENABLE CONTROL REGISTER ..
  PSMC2OEN  = 0b00000001;  // .. similar for the 2nd PSMC, with output on PSMC2A / RC6
            //        ||______ b0 : PWM Output Enable for output 'A'
            //        |             H = PWM output is active on PSMCx output y pin
            //        .             L = PWM output is not active, normal port functions in control..
            // (bit 0 controls "Output A", bit 1 "B", bit 2 "C", etc etc.
            //  Note the similar-sounding purpose of the ..STR0 register ! )
  PSMC1CON  = 0b11000000;  // PSMC CONTROL REGISTER ..
  PSMC2CON  = 0b11000000;
            //  ||  |__|___ b3..0 : 0000 = single PWM waveform generation
            //  ||_________ b6 : PSMCxLD: PSMC Load Buffer Enable bit
            //  |                H= "PSMCx registers are ready to be updated" (here, they ARE ready from the software's point of view)
            //  |                L= "PSMCx buffer update complete" (details below)
            //  |__________ b7 : H=PSMC module enabled
            // From the PIC16F1782/3 datasheet, page 222, 24.10.3 "Module Enabled Updates" :
            // > The sequence for loading the buffer registers when the
            // > PSMC module is enabled is as follows:
            // > 1. Software updates all registers.
            // > 2. Software sets the PSMCxLD bit.
            // > 3. Hardware updates all buffers on the next period event.
            // > 4. Hardware clears PSMCxLD bit.
            // WB: At THIS POINT (during init), the PSCMC was *NOT* enabled on entry,
            //     but it cannot hurt setting the "Load Buffer Enabled" bit despite that:
            // Almost done.. configure the PSMC1 & PSMC2 interrupts :
  PIE4     &= 0b11001100;  // PERIPHERAL INTERRUPT ENABLE REGISTER 4
            //    ||  ||_____ b0 : "PSMC1SIE", L=PSMC1 auto-shutdown interrupt disabled
            //    ||  |______ b1 : "PSMC2SIE", L=PSMC2 auto-shutdown interrupt disabled
            //    ||_________ b4 : "PSMC1TIE", L=PSMC1 time base interrupts disabled
            //    |__________ b5 : "PSMC2TIE", L=PSMC2 time base interrupts disabled
  // At this point, both "PSMC"s should produce simple PWM on their 'A' outputs.
#endif // SWI_USE_BINARY_NUMBERS ?



  IOP_ADCCLK_PIN_HI; // test (RA3?)
  NOP();             // "Read-modify-write sequence on the same PORT may fail" (they won't, but thanks for the warning, CC5X)
  IOP_ADCCLK_PIN_LO;



  //-------------------------------------------------------------------------
  // Initialize the PIC's Capture/Compare unit to 'time' the GPS sync period.
  //   As it turned out later, the maximum timer input frequency
  //   for the PIC16F178x's CAPTURE/COMPARE unit is still limited
  //   to Fosc/4, same as the timers in stonage PICs. Aaargh !
  // (WB decided to "slightly overclock" the PIC with the 10 MHZ OCXO input,
  //  multiplied by four with the internal PLL, to achieve Fosc=40 MHz,
  //  instead of the "32 MHz maximum clock" for the PIC16(L)F1783 .
  //  -> capture timer clock frequency = Fosc/4 = 40 MHz / 4
  //   -> 100 ns TRUE CAPTURE RESOLUTION
  //        (still not even close to the "12.5 ns"-story told by the datasheet,
  //         but better than 1 / ( 10 MHz / 4 ) = 400 ns capture resolution,
  //         which may cause significant phase noise if the control loop
  //         needs to be faster than expected. )
  // From the PIC16F1782/3 datasheet, DS40001579E page 247 :
  // > Capture mode makes use of the 16-bit Timer1 resource.
  // > When an event occurs on the CCPx pin, the 16-bit CCPRxH:CCPRxL
  // > register pair captures and stores the 16-bit value of the TMR1H:TMR1L
  // > register pair, respectively.  (...)
  // > Note: Clocking Timer1 from the system clock (Fosc)
  // >       should not be used in Capture mode.
  // >       In order for Capture mode to recognize the trigger event
  // >       on the CCPx pin, Timer1 must be clocked from the
  // >       instruction clock (Fosc/4) .
  //   WB : Aaaargh ! Would LOVE to have Timer1 running at Fosc = 40 MHz,
  //        but that wouldn't do any good anyway, because (on page 176) :
  // > When the Fosc internal clock source is selected, the Timer1 register
  // > value will in crement by four counts every instruction clock cycle .
  //   WB : Aaaaaargh ! Farewell, capture resolution in the sub-100-ns-range !
  // Anyway, initialize Timer1 first, as described in DS40001579E page 247 .
  // This is trivial because Timer1 doesn't even have a 'reload', aka 'period'-
  // register.. it simply increments from 0x0000 to 0xFFFF,
  //            from where it overflows to 0x0000, thus a 65536 'Fosc/4' cycles:
  // 10 MHz / 65536 = 152.5878906 Hz = 1 / 6.5536 milliseconds .
  //    (may be useful to drive the multiplexed 7-segment display one day..) .
  // With the one-pulse-per-second GPS sync signal, Timer1 will overflow
  // 152 or 153 times between two sync pulses, but that's no problem...
  // see Timer1 interrupt handler .
  TMR1   = 0x0000;       // Start counting at zero (for what it's worth)
                         // TMR1 = 16-bit combination of TMR1H + TMR1L .
  PIE1bits.TMR1GIE = 0;  // disable "Timer1 Gate Interrupt"     (b7)
  PIE1bits.TMR1IE  = 0;  //  and    "Timer1 Overflow Interrupt" (b0)
#if SWI_USE_BINARY_NUMBERS
  T1CON  = 0b00000001;   // TIMER1 CONTROL REGISTER
         //  ||||||||______ b0 : TMR1ON : 1 = Timer1 enabled
         //  |||||||_______ b1 : unused
         //  ||||||________ b2 : T1SYNC : 0 = do not synchronize async clock input
         //  |||||_________ b3 : T1OSCEN: 0 = dedicated Timer1 oscillator circuit disabled
         //  ||\|__________ b5..4 : T1CKPS (prescaler) : 00bin = divide by ONE
         //  \|____________ b7..6 : TMR1CS (clk source): 00bin = Fosc/4 (01=Fosc is USELESS)
  T1GCON = 0b00000000;   // TIMER1 GATE CONTROL REGISTER
         //  ||||||\|______ b1..0 : Timer1 Gate Source Select bit (doesn't matter here)
         //  ||||||________ b2 : Timer1 Gate Current State bit
         //  |||||_________ b3 : Timer1 Gate Single-Pulse Acquisition Status bit
         //  ||||__________ b4 : Timer1 Gate Single-Pulse Mode bit
         //  |||___________ b5 : Timer1 Gate Toggle Mode bit
         //  ||____________ b6 : Timer1 Gate Polarity bit (doesn't matter here)
         //  |_____________ b7 : Timer1 Gate Enable bit : 0 = Timer1 counts regardless of gate
#endif // SWI_USE_BINARY_NUMBERS ?
  // Above: Preparation of Timer1 for the purpose of INPUT CAPTURE.
  // Below: Initialisation of the INPUT CAPTURE itself...
  //  - *Each* RISING edge of the GPS sync pulse on CCP1/RB0
  //                       shall copy TMR1H:TMR1L (16 bit)
  //     into the capture register,  CCPR1H:CCPR1L .
  APFCONbits.CCP1SEL = 1;  // CCP1 Input/Output Pin Selection :
                           //  1 = CCP1 is on pin RB0
                           //  0 = CCP1 is on pin RC2
  PIE1bits.CCP1IE    = 0;  // disable interrupt from Capture/Compare Module #1
#if SWI_USE_BINARY_NUMBERS
  CCP1CON= 0b00000101;     // CCP1 Control Register [DS40001579E page 255]
         //  ||\|\__|______ b3..0 : Capture/Compare 1 Mode Select Bits
         //  .. |                   0101 = Capture mode: every rising edge
         //  nc |__________ b5..4 : PWM Duty Cycle least significant bits. Igored in Capture mode.
#endif // SWI_USE_BINARY_NUMBERS ?
  PIR1bits.CCP1IF    = 0;  // clear Capture/Compare 1 interrupt flag
  // At this point, each rising edge on the GPS sync pulse (one pulse per second)
  // should be captured as a 16-bit value in CCPR1H:CCPR1L .
  // Whenever new capture was made, the PIC will also set CCP1IF ("interrupt flag").
  // But to use as few interrupts as possible (we may need interrupts for the ADC later),
  // the CCP1IF-flag does not fire an interrupt. Instead, it's polled in the main loop
  // which is no problem because this only happens once per second.

  UART_Init();  // initialize the UART (serial port for testing, control, and ADC->PC)
  // The serial data line, presented to the PC's RS-232 'RXD',
  // should be IDLE now. On a true RS-232, an IDLE line has *negative*(!)
  // voltage, which for historic reasons is called 'MARK STATE' (1) !
 
  ADC_Init();   // initialize the A/D converter (but don't start the ADC interrupt yet)
  // Details about the ADC in the PIC16F1782/3 datasheet, DS40001579E,
  // pages 141.., Ch. 17.1, "ADC Configuration" .
  // Note: ADC-associated port pins have already been initialized, e.g. RA0/AN0,
  //       including the TRIS and ANSEL settings (see gpsdo_pic_main.c) .

#ifndef __BORLANDC__
  // To avoid sending garbage with the wrong bitrate,
  //  check if the PIC runs with the intended clock source. Details further below.
  wait_1ms();     // waits 1 ms with the correct clock source, but 1.5 sec
  // if the external clock is missing, and the PIC runs in "Oscillator Fail Mode" !
  // If all works well, the above 1-ms delay looks like a "long stop bit"
  // to the remote receiver.
  if( PIR2bits.OSFIF ) // OSFIF: "Oscillator Fail Interrupt Flag" (H when pending)
   {  PIR2bits.OSFIF = 0; // clear the "Oscillator Fail Interrupt Flag"
      RESET();    // reset the whole device. Maybe it restarts with the EXTERNAL osc...
   } // end if < FSCM error trip >
#if(0) // TEST for the serial port's output polarity and timing:
                       //            _   _   _   _   _   _logic "0" !
  UART_SendChar(0x55); // __________| |_| |_| |_| |_| |________________________________
                       //            S 0 1 2 3 4 5 6 7 (stop bit = idle = logic '1' !! )
                       //           |<---- 78 us ---->|
#endif // TEST ?
  wait_1ms();
#endif // ndef __BORLANDC__ ?

  UART_SendString("\r\nDL4YHF GPSDO V1.1\r\n"); // use strings economically .. this is a PIC with very small CODE memory !

  IOP_ADCCLK_PIN_HI; // test (RA3?)
  NOP();             // "Read-modify-write sequence on the same PORT may fail" (they won't, but..)
  IOP_ADCCLK_PIN_LO;


  // Init application variables, etc..
  ClearMem( (uint8_t*)&xx, 20*cic_s );  // clear everything that belongs to the CIC filter
  downsample_cnt    = 0;  // XC8 does clear global variables automatically (presumably to save ROM)
  bSyncPulseCounter = 0;  // haven't seen a complete GPS sync cycle yet !
  i16FreqOffset     = 0;  // frequency offset (measured OXCO minus 10 MHz) not measured yet
  ResetIntegralsAndLowpass(); // -> ErrorIntegral = LowpassIn = LowpassOut = 0
  bLowpassSpeed     = 8;  // use a 'fast' lowpass before the Vctrl output during initialisation
  u16VctrlBias      = 32767; // begin with ideal pulse width modulator duty cycle : 50 percent
  cDebugMode = 0;  // 0 (zero) : do NOT send 'debug messages' via UART (*)
                   // 'n'    : numeric output (once every GPS pulse)
                   // 'p'    : plotter (crude ASCII output, but ok to check the damping)
  fflags = (1< 23.2 Timer2 Interrupt
  // > Timer2 can also generate an optional device interrupt.
  // > The Timer2 output signal (TMR2-to-PR2 match)
  // > provides the input for the 4-bit counter/postscaler. This
  // > counter generates the TMR2 match interrupt flag which
  // > is latched in TMR2IF of the PIR1 register. The interrupt
  // > is enabled by setting the TMR2 Match Interrupt Enable
  // > bit, TMR2IE, of the PIE1 register 
  // Note: Like many other 'on chip peripherals' (except the PSMC),
  //       Timer2 isn't fed with the OSC frequency (here: 40 MHz)
  //       but with the instruction cycle frequency (here: 10 MHz) !
  PR2  = 199; // interrupt frequency := 10 MHz / (PR2+1) .. examples below.
  // PR2=199 : fs_in=50000.0 Hz, fs_out=12500.0 Hz, quite a waste of bandwidth but
  //        with "only" 12500 samples * 2 bytes to send over the UART, and 500 kBit/second,
  //        there is enough space for another byte (i.e. a 3-byte "sample frame") :
  //        12500 Hz * 3 * 12 bit = 450 kBit/second . 
  //        The third byte can be used for frame sync, AND (maybe in future) 
  //        to pass on the NMEA stream from the GPS receiver to the PC .
  // PR2=124 : fs=80kHz, 1/ 80 kHz = 12.5 us = 15.6 "T_AD" cycles, but too fast for the ADC with 12 bit/sample.
  //           Even with only 10 bit / sample, there was still jitter on the S&H input
  //           due to the non-sychronizeable ADC clock prescaler !
  // PR2=135 : fs=73529.41176 Hz -> 17.0 "T_AD" cycles, no jitter, but what an ugly frequency !
  // PR2=149 : fs_in=66666.66666 Hz, fs_out = 16.6666 kHz (easy to remember but..)
  //     BUT: The sampling interval isn't an integer multiple of the ADC clock
  //          (after the div-by-32 prescaler) -> sampling point observed as the spike
  //          on the analog input (AN0/RA0) showed a 600 ns jitter again !
  //          Obviously, setting the "GO"-bit in ADCON0 does not synchronize the
  //          ADC's internal clock prescaler (which we need for T_AD=0.8us),
  //          causing jitter in the real sampling time .
  //     -> Sampling intervals must be integer multiples of T_AD = 0.8 us, 
  //          furthermore T_sample must be at least 17 (not 15!) * T_AD (for 12 bit),
  //          i.e.:   1 / (17*0.8us) = 73529.41176 Hz, 1 / (18*0.8us) = 69444.4444 Hz,
  //                  1 / (19*0.8us) = 65789.47368 Hz, 1 / (20*0.8us) = 62500.0000 Hz,
  //                  1 / (21*0.8us) = 59523.80952 Hz, 1 / (22*0.8us) = 56818.1818 Hz,
  //                  1 / (23*0.8us) = 54347.82609 Hz, 1 / (24*0.8us) = 52083.3333 Hz,
  //                  1 / (25*0.8us) = 50000.00000 Hz
  //        DL4YHF decided to use fs_in = 50 kHz, which is a pity because
  //        the PIC16F1783 would be fast enough to run the CIC filter at fs_in = 80 kHz.
  //        
#if SWI_USE_BINARY_NUMBERS
  T2CON  = 0b00000100;
      //     ||||||\|_ b1..0 = "T2CKPS"  : Timer2 Clock Prescaler, 0=1:1 (don't divide)
      //     ||||||___ b2    = "TMR2ON"  : 1 = timer2 on, 0=timer 2 off
      //     |\__|____ b6..3 = "T2OUTPS" : Output Postscaler, 0=1:1 (don't divide)
      //     |________ n.c.
#endif      
  // Enable Timer2 interrupt (to read and process the analog input)
  PIE1bits.TMR2IE = 1; // DS40001579E page 80 : "Timer2 Interrupt Enable"
  INTCONbits.PEIE = 1; // DS40001579E page 79 : "Peripheral Interrupt Enable"
  INTCONbits.GIE  = 1; // DS40001579E page 79 : "Global Interrupt Enable"


  while(1) // end less main loop .... only left when oscillator-fault causes a RESET
   {
 //  IOP_RED_LED_ON;  // Length of this pulse: 100 ns (with Fcyc = 40 MHz/4 = 10 MHz),
 //  IOP_RED_LED_OFF; // PIC16F1783 'slightly overclocked' (Fcyc max = 32 MHz / 4 = 8 MHz).

     //-------------------------------------------------------------------------
     // Minimalistic 'command handler', controlled by single characters
     // received from the serial port (the PIC's "EUSART") :
     bTemp = UART_ReadChar();
     if( bTemp ) // successfully read another character from the RX-FIFO...
      {
        // Life would be easy if not only the polarity of the TXD-OUTPUT,
        //      but also the polarity of the PIC'S RXD-INPUT could be inverted.
        // Then we could get away without the usual RS-232 level converter,
        // like the once-famous MAX232 which *inverts* the signal.
        // Crude fix: Fix the garbled received character 
        //  (with an EEEXTRA long startbit, which was in fact the the IDLE line
        //   state, which has the same voltage like a stopbit). 
        // Example: Someone has sent 0x55 (upper case letter 'U').
        //          Waveform seen with an o-scope on the RXD line :
        //            _   _   _   _   _   _logic "0" ! 
        // __________| |_| |_| |_| |_| |________________________________
        //            S 0 1 2 3 4 5 6 7 (stop bit = idle = logic '1' !! )
        // 
        //          Signal 'seen by the PIC's EUSART' :
        // __________   _   _   _   _   ________________________________
        // .        .|_| |_| |_| |_| |_|
        // .......... 0 1 2 3 4 5 6 7 |
        //      .     | |             |_ one stopbit, no idle time, but next startbit
        //      .     | |_ first bit with 'variable content'.
        // "erroneous |    
        //  data,     |__ "looks" like the 1st data bit
        //  no stopbit"   but was in fact the REAL start bit .
#      if( RXD_INVERT_POLARITY )   // invert the polarity of received data by software ?
        bTemp = UART_InvertRcvdChar( bTemp );
#      endif // RXD_INVERT_POLARITY ?
        if( bTemp != 'a' )
         { fflags = 0; // stop sending filtered analog input to the UART
         }
        if( fflags == 0 ) // only when the UART isn't occupied by the filter-output:
         { UART_SendString("rx=");
           UART_SendChar( bTemp );
           UART_SendChar( '=' );
           UART_SendDecimal( bTemp );
           UART_SendCrNl();
         }
        // Above: just a simple "echo test" for the serial port .
        // Below: command "interpreter" ...
        switch( bTemp )
         { case 'a' :  // start analog input
              cDebugMode = 0;  // .. UART isn't available for debug-output anymore
              fflags |= (1< ErrorIntegral = LowpassIn = LowpassOut = 0
              break;
           case 'z' :  // 'zero' Vctrl (jumps to the minimum frequency)
              ResetIntegralsAndLowpass();
              i32LowpassOut.i16.hi = -32767; // almost 0 % duty cycle
              break;
           case 'm' :  // 'max' Vctrl (jumps to the maximum frequency; same as the "automatic" test initiated by pic_emulator.c)
              ResetIntegralsAndLowpass();
              i32LowpassOut.i16.hi = 32767;  // almost 100 % duty cycle
              break;

           default  : break;  // unknown test command
         }
      } // end if( UART_CheckForRx() )


     //-------------------------------------------------------------------------
     // If a GPS sync pulse has captured the 16-bit 10 MHz timer value,
     // process it HERE (in the main loop, to avoid disturbing the ADC sampling interrupt)
     if( PIR1bits.CCP1IF )
      {  PIR1bits.CCP1IF = 0;  // clear Capture/Compare 1 interrupt flag
        ProcessCapturedSyncPulse(); // -> measure number of timer ticks since last GPS sync pulse

        switch ( cDebugMode )
         { case 'n' : // numeric display of frequency error, error integral, etc.. in numeric form
              if( bSyncPulseCounter < 255 ) // still in the coarse 'init' phase ?
               { UART_SendString( "t=" );  // error signal  (Regelabweichung in Hertz)
                 UART_SendDecimal( bSyncPulseCounter );
                 UART_SendChar( ' ' );
               }
              UART_SendString( "df=" );  // error signal  (Regelabweichung in Hertz)
              UART_SendDecimal( i16FreqOffset );
              UART_SendString( " i1=" ); // error integral (Fehlerintegral)
              UART_SendDecimal( i16ErrorIntegral );
              UART_SendString( " i2=" ); // integral of the error integral
              UART_SendDecimal( (i32ErrorIntegralIntegral.i32 >> 4) );
              UART_SendString( " lpi=" ); // lowpass input (Stellgr��e ohne Offset, ideal 0)
              UART_SendDecimal( i16LowpassIn );
              UART_SendString( " lpo=" ); // lowpass output (tiefpassgefilterte Stellgr��e ohne Offset, ideal 0)
              UART_SendDecimal( i32LowpassOut.i16.hi );
              UART_SendString( " pwm=" ); // Vctrl output (pulse width modulator register value, including bias)
              UART_SendDecimal( (int16_t)PSMC1DC - 32767 );
              UART_SendString( " xx=" ); // Vctrl output (pulse width modulator register value, including bias)
              UART_SendDecimal( (int16_t)xx );
              UART_SendString( " yy=" ); // Vctrl output (pulse width modulator register value, including bias)
              UART_SendDecimal( (int16_t)yy );
              UART_SendCrNl();
              break;
           case 'p' : // plot frequency error, error integral, etc.. in a kind-of DIAGRAM
              FillString( sz80Temp, ' ', 80 );
              sz80Temp[0]  = '|';  // marker for the lower endstop
              sz80Temp[39] = '|';  // marker for the center ("0")
              sz80Temp[78] = '|';  // marker for the upper endstop
              Plot( sz80Temp, 'e', i16FreqOffset    ); // ideally "all zero"
              Plot( sz80Temp, 'i', i16ErrorIntegral % 40 );
              Plot( sz80Temp, 'o', i32LowpassOut.i16.hi % 40 );
              UART_SendString( sz80Temp );
              UART_SendCrNl();
              break;
         } // end switch ( cDebugMode )
      } // end if( PIR1bits.CCP1IF ) [new sync pulse captured, happens once per second]

     //-------------------------------------------------------------------------
     // Update the low-pass filtered Vctrl output, approx 610.3 times per second,
     //        to keep the phase noise as low as possible .
     //        Equivalent analog circuit :
     //                         ____
     //    i16LowpassIn  O-----|____|----*----O  i32LowpassOut.i16.hi
     //                          R       |
     //                    (100 kOhm)  __|__
     //                                _____
     //                          C       |
     //                       (1070 uF) _|_
     //
     //               R * C (time constant) = 65536 / f_sample [610.3 Hz]
     //                                     = 107 seconds .
     //              -3 dB corner frequency = 1 / (2*pi*R*C) = 1.5 mHz .
     //
     //
     // To keep the interrupt free for 'other purposes', this happens in the
     // main loop, but synchronized by the 610.3 HZ PWM timer (40 MHz / 65536).
     // Doing this IN SOFTWARE (instead of using a bulky ANALOG RC lowpass)
     // is more flexible, and the filter's time constant (tau ~ RC) can be
     // adjusted via software if necessary (even lower phase noise, etc).
     //      Details about how to poll for the PWM (here: PSMC) 'period event'
     //      are in the PIC16F1782/3 datasheet (DS40001579E), page 194 :
     // > The match (PSMCxTMR = PSMCxPR) will generate a period match interrupt,
     // > thereby setting the PxTPRIF bit of the PSMC Time Base Interrupt Control
     // > PSMCxINT) register (..)
     if( PSMC1INTbits.P1TPRIF ) // "PSMC 1 Time Base Period Interrupt Flag" set ?
      { // here approximately 610 times each second... if the main loop is fast enough
        PSMC1INTbits.P1TPRIF = 0;  // clear PSMC 1 period interrupt flag
        IOP_DEBUG_PIN1_HI;  // TEST: Is the digital filter 'fast enough' for f_Sample=610 Hz ?
        // Update the first order lowpass filter (with 0 dB DC gain).
        // Implementation :   y[k]  =  (1-alpha)*x[k]   +  (alpha)*y[k-1]
        // Simplified filter constant (alpha, close to 1.0) :
        //                    tau_samples = 1 / (1-alpha)
        //                    alpha = 1 - (1/tau_samples)
        //                    tau [seconds] = tau_samples * T_Sample
        // Examples: alpha = 0.9 gives a time constant of 10 samples,
        //           alpha = (1-1/256)   = 0.996      for tau = 256 samples,
        //           alpha = (1-1/65536) = 0.99998474 for tau = 65536 samples .
        // To avoid floating point maths:
        //           Instead of dividing by 65536, use the following 4-byte
        //           C-union (i32LowpassOut) in memory:
        //    ______________________
        //   |     |     |    |     |
        //   | b3  | b2  | b1 |  b0 |
        //   |_____|_____|____|_____|
        //               .          .
        //   |<---------i32-------->| ('accumulator' for the digital lowpass.
        //   .           .          .   i16LowpassIn is added to THIS part.)
        //   .           |<-.i16.l->| (fractional part for tau=65536 samples)
        //   |<--.i16.h->| (delivers the 'accu' DIVIDED by 65535)
        //
        // To realize variable RC time constants,
        //    simply run the lowpass-algorithm 1 to 8 times in a loop:
        for( bTemp=0; bTemp INCREASE PWM DC
#      else
        PSMC1DC = (uint16_t)( u16VctrlBias - i32LowpassOut.i16.hi ); // frequency too high -> DECREASE PWM DC
#      endif
        // -> PSMC 1 DUTY CYCLE COUNT REGISTER (16 bit access)
        // From the PIC16F1782/3 datasheet, page 222, 24.10.3 "Module Enabled Updates" :
        // > When the PSMC module is enabled (PSMCxEN = 1),
        // > the PSMCxLD bit of the PSMC Control (PSMCxCON) register must be used.
        // > When the PSMCxLD bit is set, the transfer from the
        // > register to the buffer occurs on the next period event. [here: 610 times each second]
        // > The PSMCxLD bit is automatically cleared by hardware
        // > after the transfer to the buffers is complete.
        PSMC1CONbits.PSMC1LD = 1;  // see details above ! (transfer from double-buffered PSMC1DC)

      } // end if < PSMC 1 period interrupt flag > ? [happens 610 times per second]

     //-------------------------------------------------------------------------
     // Sync pulse indicator and software PWM for the RGB indicator LED .
     //    Don't waste precious time in the interrupt for this !
     //    The following code is also executed 610 times per second,
     //    synchronized by a hardware timer, but not occupying the interrupt.
     if( IOP_GPS_SYNC_ACTIVE ) // GPS sync pulse currently active:
      { // Green flashes along with the sync pulse when "ok",
        // Red   flashes : momentary frequency is TOO HIGH,
        // Blue  flashes : momentary frequency is TOO LOW .
        if( i16FreqOffset      > 2/*Hz*/ )
         { IOP_RED_LED_ON;
           IOP_GREEN_LED_OFF;
           IOP_BLUE_LED_OFF;
         }
        else if( i16FreqOffset < -2/*Hz*/ )
         { IOP_RED_LED_OFF;
           IOP_GREEN_LED_OFF;
           IOP_BLUE_LED_ON;
         }
        else // momentary frequency error ZERO : green flash
         {   // (the error integral may still be large, to return to the original phase)
           IOP_RED_LED_OFF;
           IOP_GREEN_LED_ON;
           IOP_BLUE_LED_OFF;
         }
      }
     else  // time between two pulses : software PWM showing error integral (with polarity).
      { IOP_GREEN_LED_OFF;
        if( i16ErrorIntegral >= 0 )  // positive error integral : RED
         { IOP_BLUE_LED_OFF;
           if( i16ErrorIntegral > 255 )
            { IOP_RED_LED_ON;
            }
           else // i16ErrorIntegral <= 255
            { if( i16ErrorIntegral > PSMC1TMRH )
               { IOP_RED_LED_ON;
               }
              else
               { IOP_RED_LED_OFF;
               }
            }
         }
        else // NEGATIVE error integral : BLUE (also value-dependent intensity)
         { IOP_RED_LED_OFF;
           if( i16ErrorIntegral <= -255 )
            { IOP_BLUE_LED_ON;
            }
           else // i16ErrorIntegral between -1 and -254
            {
              if( (-i16ErrorIntegral) > PSMC1TMRH )
               { IOP_BLUE_LED_ON;
               }
              else
               { IOP_BLUE_LED_OFF;
               }
            }
         }
      } // end else < between two GPS sync pulses >

     //-------------------------------------------------------------------------
     // Check the oscillator source. Sometimes a PIC16F1783 ran much slower
     // than expected, despite correct settings in CONFIG1 ("ECH") + CONFIG2 ("4x PLL").
     // Reason: The OCXO was a 'slow starter', and when turning on the OCXO
     // and the PIC at the same time (same supply voltage), the PIC started running
     // with the internal oscillator after the Fail-Safe Clock Monitor (FSCM)
     // detected the absence of an external clock after approximately 2 ms !
     //
     // > When the external clock fails, the FSCM switches the device clock
     // > to an internal clock source  and sets the bit flag OSFIF of the
     // > PIR2 register. Setting this flag will generate an interrupt if the
     // > OSFIE bit of the PIE2 register is also set. The device firmware can
     // > then take steps to mitigate the problems that may arise from a
     // > failed clock. The system clock will continue to be sourced from
     // > the internal clock source until the device firmware successfully
     // > restarts the external oscillator and switches back to external operation.
     // ... this is exactly what happened during 'breadboard testing' .
     //     To avoid this, check if the CPU runs from the *intended* source:
     if( PIR2bits.OSFIF ) // OSFIF: "Oscillator Fail Interrupt Flag" (H when pending)
      { IOP_RED_LED_ON;  // There's a problem with the oscillator .. FSCM switched to internal RC osc ?
        // > The OSFIF bit should be cleared prior to switching to the
        // > external clock source. If the Fail-Safe condition still exists,
        // > the OSFIF flag will again become set by hardware.
        PIR2bits.OSFIF = 0; // clear the "Oscillator Fail Interrupt Flag"
        RESET();    // reset the whole device. Maybe it restarts with the EXTERNAL osc...
      } // end if < FSCM error trip > ?

#   if( SWI_STANDALONE_SIMULATOR )
     if( (!EMU_fKeyboardControlled) && (i16LowpassIn == i32LowpassOut.i16.hi) && ( EMU_i64Tsim_ns > 20000000000)  // min. 20 seconds ...
#     if (SWI_CONTROLLER_PRINCIPLE==CONTROLLER_PRINCIPLE_II )
        && (i16ErrorIntegral==0)
#     endif
       )
      { EMU_fSimulationPaused = TRUE; // guess the transient is over, and the loop has completely settled
        // (the custom 'emulator environment' will pause the simulation and show the results somewhere)
      }
     PIC_Emulator();
#   endif // SWI_STANDALONE_SIMULATOR ?

   } // end of the 'endless' main loop

} // end main()


#ifdef __BORLANDC__
void EMU_StartTest(void) // similar as the manual 'm'-test, but initiated by pic_emulator.c after a few 'simulated' seconds
{
  ResetIntegralsAndLowpass();
  i32LowpassOut.i16.hi = +32767; // warp almost 100 % duty cycle
  // (the standalone simulator, pic_emulator.c, will now analyse the response)
} // end EMU_StartTest()
#endif // def __BORLANDC__ ?


/*
EOF ( gpsdo_pic_main.c ) */