aboutsummaryrefslogblamecommitdiffstats
path: root/lib/xmerl/src/xmerl_sax_parser_base.erlsrc
blob: d38045f2a55808cadb1bb27c9d961e114e814e51 (plain) (tree)
1
2
3
4
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
3615
3616
3617


                   
                                                        




































                                                                         


              
















































































































                                                                                                 









                                                  






































                                                                                                                                           

                                            
















                                                                                 











                                                   




















































































































































































































































































                                                                                                                     



                                                   



























                                                                                             













                                                    
                                  

                                                               












                                                                        





                                                  





























                                                                                            





                                                    
































































































                                                                                             

                                                             














































                                                                                                           
                                                                                      



















































































































































                                                                                        

                                                           

                                                                                                    
                                                                                    

                                                                                                      
                                                                                    

                                                                                                     
                                                                                    

                                                                                                     
                                                                                    











                                                                                                        






                                                                                                  


                                                                                           
                                                                      
                                      
                                                                         
                                                             








                                                                                                           





















































































                                                                                                        



                                                                


                                                                                       



                                                                  























































                                                                                                                    






                                                                                                 







                                                                                  



                                                                                                   






























































































                                                                                                  

                                                             





















































































































































                                                                                                         
                                                                    

































































































































































































































































































































                                                                                                             









                                                           




































































                                                                                                                                      









                                                                       


                                                                 









                                                                       






















































































                                                                                                            







                                                                                                  























                                                                                        











                                                         



                                                             











                                                         




                                                                







                                                        



                                                            













                                                          


                                                              

                                                    





































































                                                                                          
                                                                 





                                                                                 




                                                                   


































































































































































































                                                                                                       
















                                                                

                                                   












                                                         

                                                  








                                                         

                                      
                                                                              





































































































































































                                                                                                      
                                     




























                                                                                                         
                                                                                               















                                                                                                          








                                                                                                             

               
                                                                         
                                      







                                                                                                        















                                                                                 







                                                   






















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































                                                                                                                               
%%-*-erlang-*-
%% %CopyrightBegin%
%% 
%% Copyright Ericsson AB 2008-2011. All Rights Reserved.
%% 
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
%% compliance with the License. You should have received a copy of the
%% Erlang Public License along with this software. If not, it can be
%% retrieved online at http://www.erlang.org/.
%% 
%% Software distributed under the License is distributed on an "AS IS"
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
%% the License for the specific language governing rights and limitations
%% under the License.
%% 
%% %CopyrightEnd%
%%----------------------------------------------------------------------
%% Start of common source
%%----------------------------------------------------------------------
%-compile(export_all).

%%----------------------------------------------------------------------
%% Include files
%%----------------------------------------------------------------------
-include("xmerl_sax_parser.hrl").

%%----------------------------------------------------------------------
%% External exports
%%----------------------------------------------------------------------
-export([
	 parse/2,
	 parse_dtd/2,
	 is_name_char/1,
	 is_name_start/1
        ]).

%%----------------------------------------------------------------------
%% Internal exports
%%----------------------------------------------------------------------
-export([
	 cf/3,
	 cf/4,
	 cf/5
        ]).

%%----------------------------------------------------------------------
%% Records
%%----------------------------------------------------------------------

%%----------------------------------------------------------------------
%% Macros
%%----------------------------------------------------------------------
-define(HTTP_DEF_PORT, 80).

%%======================================================================
%% External functions
%%======================================================================
%%----------------------------------------------------------------------
%% Function: parse(Xml, State) -> Result
%% Input:    Xml = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {ok, Rest, EventState} |
%%           EventState = term()
%% Description: Parsing XML from input stream.
%%----------------------------------------------------------------------
parse(Xml, State) ->
    RefTable = ets:new(xmerl_sax_entity_refs, [private]),

    State1 =  event_callback(startDocument, State),

    case catch parse_document(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
	{ok, Rest, State2} ->
	    State3 =  event_callback(endDocument, State2),
	    ets:delete(RefTable),
	    {ok, State3#xmerl_sax_parser_state.event_state, Rest}; 
	{fatal_error, {State2, Reason}} ->
	    State3 =  event_callback(endDocument, State2),
	    ets:delete(RefTable),
	    format_error(fatal_error, State3, Reason);
	{event_receiver_error, State2, {Tag, Reason}} -> 
	    State3 =  event_callback(endDocument, State2),
	    ets:delete(RefTable),
	    format_error(Tag, State3, Reason);
	Other ->
	    _State2 = event_callback(endDocument, State1),
	    ets:delete(RefTable),
	    throw(Other)
    end.

%%----------------------------------------------------------------------
%% Function: parse_dtd(Xml, State) -> Result
%% Input:    Xml = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {ok, Rest, EventState} |
%%           EventState = term()
%% Description: Parsing XML DTD from input stream.
%%----------------------------------------------------------------------
parse_dtd(Xml, State) ->
    RefTable = ets:new(xmerl_sax_entity_refs, [private]),

    State1 =  event_callback(startDocument, State),

    case catch parse_external_entity_1(Xml, State1#xmerl_sax_parser_state{ref_table=RefTable}) of
	{fatal_error, {State2, Reason}} ->
	    State3 =  event_callback(endDocument, State2),
	    ets:delete(RefTable),
	    format_error(fatal_error, State3, Reason);
	{event_receiver_error, State2, {Tag, Reason}} -> 
	    State3 =  event_callback(endDocument, State2),
	    format_error(Tag, State3, Reason);
	{Rest, State2} when is_record(State2, xmerl_sax_parser_state) ->
	    State3 =  event_callback(endDocument, State2),
	    ets:delete(RefTable),
	    {ok, State3#xmerl_sax_parser_state.event_state, Rest}; 
	Other ->
	    _State2 = event_callback(endDocument, State1),
	    ets:delete(RefTable),
	    throw(Other)
    end.


%%======================================================================
%% Internal functions
%%======================================================================

%%----------------------------------------------------------------------
%% Function: parse_document(Rest, State) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {ok, Rest, State}
%% Description: Parsing an XML document
%%              [1] document ::= prolog element Misc*
%%----------------------------------------------------------------------
parse_document(Rest, State) when is_record(State, xmerl_sax_parser_state) ->
    {Rest1, State1} = parse_xml_decl(Rest, State),
    {Rest2, State2} = parse_misc(Rest1, State1, true),
    {ok, Rest2, State2}.


%%----------------------------------------------------------------------
%% Function: parse_xml_decl(Rest, State) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {Rest, State}
%% Description: Parsing the xml directive in the prolog.
%%             [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
%%             [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
%%----------------------------------------------------------------------
parse_xml_decl(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_xml_decl/2);
parse_xml_decl(?BYTE_ORDER_MARK_1, State) ->
    cf(?BYTE_ORDER_MARK_1, State, fun parse_xml_decl/2);
parse_xml_decl(?BYTE_ORDER_MARK_2, State) ->
    cf(?BYTE_ORDER_MARK_2, State, fun parse_xml_decl/2);
parse_xml_decl(?BYTE_ORDER_MARK_REST(Rest), State) ->
    cf(Rest, State, fun parse_xml_decl/2);
parse_xml_decl(?STRING("<") = Bytes, State) ->
    cf(Bytes, State, fun parse_xml_decl/2);
parse_xml_decl(?STRING("<?") = Bytes, State) ->
    cf(Bytes, State, fun parse_xml_decl/2);
parse_xml_decl(?STRING("<?x") = Bytes, State) ->
    cf(Bytes, State, fun parse_xml_decl/2);
parse_xml_decl(?STRING("<?xm") = Bytes, State) ->
    cf(Bytes, State, fun parse_xml_decl/2);
parse_xml_decl(?STRING("<?xml") = Bytes, State) ->
    cf(Bytes, State, fun parse_xml_decl/2);
parse_xml_decl(?STRING_REST("<?xml", Rest1), State) ->
    parse_xml_decl_1(Rest1, State);
parse_xml_decl(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State) when is_binary(Bytes) ->
    case unicode:characters_to_list(Bytes, Enc) of 
	{incomplete, _, _} ->
	    cf(Bytes, State, fun parse_xml_decl/2);
	{error, _Encoded, _Rest} ->
	    ?fatal_error(State,  lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])));
	_ ->
	    parse_prolog(Bytes, State)
    end;
parse_xml_decl(Bytes, State) ->
    parse_prolog(Bytes, State).
    

parse_xml_decl_1(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
    if
	?is_whitespace(C) ->
	    {_XmlAttributes, Rest1, State1} = parse_version_info(Rest, State, []),
	    %State2 =  event_callback({processingInstruction, "xml", XmlAttributes}, State1),% The XML decl. should not be reported as a PI
	    parse_prolog(Rest1, State1);
	true ->
	     parse_prolog(?STRING_REST("<?xml", Bytes), State)
    end;	
parse_xml_decl_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_xml_decl_1/2], undefined).



%%----------------------------------------------------------------------
%% Function: parse_prolog(Rest, State) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {Rest, State}
%% Description: Parsing XML prolog
%%             [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
%%----------------------------------------------------------------------
parse_prolog(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_prolog/2);
parse_prolog(?STRING("<") = Bytes, State) ->
    cf(Bytes, State, fun parse_prolog/2);
parse_prolog(?STRING_REST("<?", Rest), State) ->
    {Rest1, State1} = parse_pi(Rest, State),
    parse_prolog(Rest1, State1);
parse_prolog(?STRING_REST("<!", Rest), State) ->
    parse_prolog_1(Rest, State);
parse_prolog(?STRING_REST("<", Rest), State) ->
    parse_stag(Rest, State);
parse_prolog(?STRING_UNBOUND_REST(C, _) = Rest, State) when ?is_whitespace(C) -> 
    {_WS, Rest1, State1} = whitespace(Rest, State, []), 
    parse_prolog(Rest1, State1);
parse_prolog(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_prolog/2], 
			     "expecting < or whitespace").


parse_prolog_1(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_prolog_1/2);
parse_prolog_1(?STRING("D") = Bytes, State) ->
    cf(Bytes, State, fun parse_prolog_1/2);
parse_prolog_1(?STRING("DO") = Bytes, State) ->
    cf(Bytes, State, fun parse_prolog_1/2);
parse_prolog_1(?STRING("DOC") = Bytes, State) ->
    cf(Bytes, State, fun parse_prolog_1/2);
parse_prolog_1(?STRING("DOCT") = Bytes, State) ->
    cf(Bytes, State, fun parse_prolog_1/2);
parse_prolog_1(?STRING("DOCTY") = Bytes, State) ->
    cf(Bytes, State, fun parse_prolog_1/2);
parse_prolog_1(?STRING("DOCTYP") = Bytes, State) ->
    cf(Bytes, State, fun parse_prolog_1/2);
parse_prolog_1(?STRING_REST("DOCTYPE", Rest), State) ->
    {Rest1, State1} = parse_doctype(Rest, State),
    State2 = event_callback(endDTD, State1),
    parse_prolog(Rest1, State2);
parse_prolog_1(?STRING("-"), State) ->
    cf(?STRING("-"), State, fun parse_prolog_1/2);
parse_prolog_1(?STRING_REST("--", Rest), State) ->
	    {Rest1, State1} = parse_comment(Rest, State, []),
	    parse_prolog(Rest1, State1);
parse_prolog_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_prolog_1/2], 
			     "expecting comment or DOCTYPE"). 
    


%%----------------------------------------------------------------------
%% Function: parse_version_info(Rest, State, Acc) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           Acc = [{Name, Value}]
%%           Name = string()
%%           Value = string()
%% Output:   Result = {[{Name, Value}], Rest, State}
%% Description: Parsing the version number in the XML directive.
%%              [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
%%----------------------------------------------------------------------
parse_version_info(?STRING_EMPTY, State, Acc) ->
    cf(?STRING_EMPTY, State, Acc, fun parse_version_info/3);
parse_version_info(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) ->
    {_WS, Rest1, State1} = whitespace(Rest, State, []),
    parse_version_info(Rest1, State1, Acc);
parse_version_info(?STRING_UNBOUND_REST(C,Rest), State, Acc) ->
    case is_name_start(C) of 
	true ->
	    case parse_name(Rest, State, [C]) of
		{"version", Rest1, State1} ->
		    {Rest2, State2} = parse_eq(Rest1, State1),
		    {Version, Rest3, State3} = parse_att_value(Rest2, State2), 
		    parse_xml_decl_rest(Rest3, State3, [{"version",Version}|Acc]);
		{_, _, State1} ->
		    ?fatal_error(State1, "expecting attribute version")
	    end;
	false ->
	    ?fatal_error(State, "expecting attribute version")
    end;
parse_version_info(Bytes, State, Acc)   -> 
    unicode_incomplete_check([Bytes, State, Acc, fun parse_version_info/3],
			     undefined). 



%%----------------------------------------------------------------------
%% Function: parse_xml_decl_rest(Rest, State, Acc) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           Acc = [{Name, Value}]
%%           Name = string()
%%           Value = string()
%% Output:   Result = {[{Name, Value}], Rest, State}
%% Description: Checks if there is more to parse in the XML directive.
%%----------------------------------------------------------------------     
parse_xml_decl_rest(?STRING_EMPTY, State, Acc) ->
    cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_rest/3);
parse_xml_decl_rest(?STRING("?") = Rest, State, Acc) ->
    cf(Rest, State, Acc, fun parse_xml_decl_rest/3);
parse_xml_decl_rest(?STRING_REST("?>", Rest), State, Acc) ->
    {lists:reverse(Acc), Rest, State};
parse_xml_decl_rest(?STRING_UNBOUND_REST(C, _) = Rest, State, Acc) when ?is_whitespace(C) ->
    {_WS, Rest1, State1} = whitespace(Rest, State, []),
    parse_xml_decl_encoding(Rest1, State1, Acc);
parse_xml_decl_rest(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_rest/3],
			     "expecting encoding, standalone, whitespace or ?>").


%%----------------------------------------------------------------------
%% Function: parse_xml_decl_encoding(Rest, State, Acc) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           Acc = [{Name, Value}]
%%           Name = string()
%%           Value = string()
%% Output:   Result = {[{Name, Value}], Rest, State}
%% Description: Parse the encoding attribute in the XML directive.
%%              [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
%               [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
%%----------------------------------------------------------------------     
parse_xml_decl_encoding(?STRING_EMPTY, State, Acc) ->
    cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_encoding/3);
parse_xml_decl_encoding(?STRING_REST("e", Rest), State, Acc) ->
    case parse_name(Rest, State,[$e]) of
	{"encoding", Rest1, State1} ->
	    {Rest2, State2} = parse_eq(Rest1, State1),
	    {Enc, Rest3, State3} = parse_att_value(Rest2, State2), 
	    parse_xml_decl_encoding_1(Rest3, State3, [{"encoding",Enc} |Acc]);
        {Name, _Rest1, State1} ->
	    ?fatal_error(State1, "Attribute " ++ Name ++ 
			 " not allowed in xml declaration")
    end;
parse_xml_decl_encoding(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> 
    parse_xml_decl_standalone(Bytes, State, Acc);
parse_xml_decl_encoding(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_encoding/3], 
			     undefined).


parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc) when ?is_whitespace(C) ->
    {_WS, Rest1, State1} = whitespace(Bytes, State, []),
    parse_xml_decl_standalone(Rest1, State1, Acc);
parse_xml_decl_encoding_1(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) ->
    parse_xml_decl_rest(Bytes, State, Acc);
parse_xml_decl_encoding_1(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_encoding_1/3], 
			     undefined).


%%----------------------------------------------------------------------
%% Function: parse_xml_decl_standalone(Rest, State, Acc) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           Acc = [{Name, Value}]
%%           Name = string()
%%           Value = string()
%% Output:   Result = {[{Name, Value}], Rest, State}
%% Description: Parse the standalone attribute in the XML directive.
%%              [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | 
%%                              ('"' ('yes' | 'no') '"'))
%%----------------------------------------------------------------------   
parse_xml_decl_standalone(?STRING_EMPTY, State, Acc) ->
    cf(?STRING_EMPTY, State, Acc, fun parse_xml_decl_standalone/3);
parse_xml_decl_standalone(?STRING_REST("s", Rest), State, Acc) ->
    case parse_name(Rest, State,[$s]) of
	{"standalone", Rest1, State1} ->
	    {Rest2, State2} = parse_eq(Rest1, State1),
	    {Standalone, Rest3, State3} = parse_att_value(Rest2, State2),
	    case Standalone of
		"yes" -> ok;
		"no" -> ok;
		_ ->
		    ?fatal_error(State3, "Wrong value of attribute standalone in xml declaration, must be yes or no")
	    end,
	    {_WS, Rest4, State4} = whitespace(Rest3, State3, []),
	    parse_xml_decl_rest(Rest4, State4#xmerl_sax_parser_state{standalone=list_to_atom(Standalone)}, 
				[{"standalone",Standalone} |Acc]);
        {Name, _Rest1, State1} ->
	    ?fatal_error(State1, "Attribute " ++ Name ++ 
			 " not allowed in xml declaration")
    end;
parse_xml_decl_standalone(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> 
    parse_xml_decl_rest(Bytes, State, Acc);
parse_xml_decl_standalone(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_xml_decl_standalone/3], 
			     undefined).



%%----------------------------------------------------------------------
%% Function: parse_pi(Rest, State) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {Rest, State}
%% Description: Parse processing instructions.
%%              [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
%%              [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
%%----------------------------------------------------------------------
parse_pi(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_pi/2);
parse_pi(?STRING_UNBOUND_REST(C, Rest) = Bytes, State) ->
    case is_name_start(C) of 
	true ->
	    {PiTarget, Rest1, State1} = 
		parse_name(Rest, State, [C]),
	    case string:to_lower(PiTarget) of  
		"xml" ->
		    case State#xmerl_sax_parser_state.end_tags of
			[] ->
			    {Bytes, State};
			_ ->
			    ?fatal_error(State1, "<?xml  ...?> not first in document")
		    end;
		_ ->
		    {PiData, Rest2, State2} = parse_pi_1(Rest1, State1),
		    State3 =  event_callback({processingInstruction, PiTarget, PiData}, State2),
		    {Rest2, State3}
	    end;
	false ->
	    ?fatal_error(State, "expecting name")
    end;
parse_pi(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_pi/2], undefined).

%%----------------------------------------------------------------------
%% Function: parse_pi_1(Rest, State) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {Rest, State}
%% Description: Parse processing instructions.
%%----------------------------------------------------------------------
parse_pi_1(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_pi_1/2);
parse_pi_1(?STRING_UNBOUND_REST(C,_) = Rest, State) when ?is_whitespace(C) ->
    {_WS, Rest1, State1} =  
		whitespace(Rest, State, []),
    parse_pi_data(Rest1, State1, []);
parse_pi_1(?STRING_REST("?>", Rest), State) ->
    {[], Rest, State};
parse_pi_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_pi/2],
			     "expecting whitespace or '?>'").


%%----------------------------------------------------------------------
%% Function: parse_name(Rest, State, Acc) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           Acc = string()
%% Output:   Result = {Name, Rest, State}
%%           Name = string()
%% Description: Parse a name. Next character is put in the accumulator 
%%              if it's a valid name character.
%%              [5] Name ::= (Letter | '_' | ':') (NameChar)*
%%----------------------------------------------------------------------
parse_name(?STRING_EMPTY, State, Acc) ->
    cf(?STRING_EMPTY, State, Acc, fun parse_name/3);
parse_name(?STRING_UNBOUND_REST(C, Rest) = Bytes, State, Acc) ->
    case is_name_char(C) of
	true ->
	    parse_name(Rest, State, [C|Acc]);
	false ->
	    {lists:reverse(Acc), Bytes, State}
    end;
parse_name(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_name/3], undefined).


%%----------------------------------------------------------------------
%% Function: parse_ns_name(Rest, State, Prefix, Name) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           Prefix = string()
%%           Name = string()
%% Output:   Result = {{Prefix, Name}, Rest, State}
%%           Name = string()
%% Description: Parse a namespace name. Next character is put in the 
%%              accumulator if it's a valid name character. 
%%              The difference between this function and parse_name/3 is 
%%              that a colon is interpreted as a separator between the 
%%              namespace prefix and the name.
%%----------------------------------------------------------------------
parse_ns_name(?STRING_EMPTY, State, Prefix, Name) ->
    cf(?STRING_EMPTY, State, Prefix, Name, fun parse_ns_name/4);
parse_ns_name(?STRING_UNBOUND_REST($:, Rest), State, [], Name) ->
    parse_ns_name(Rest, State, lists:reverse(Name), []);
parse_ns_name(?STRING_UNBOUND_REST(C, Rest) = Bytes, State, Prefix, Name) ->
    case is_name_char(C) of
	true ->
	    parse_ns_name(Rest, State, Prefix, [C|Name]);
	false ->
	    {{Prefix,lists:reverse(Name)}, Bytes, State}
    end;
parse_ns_name(Bytes, State, Prefix, Name) ->
    unicode_incomplete_check([Bytes, State, Prefix, Name, fun parse_ns_name/4], 
			     undefined).


%%----------------------------------------------------------------------
%% Function: parse_pi_data(Rest, State, Acc) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           Acc = string()
%% Output:   Result = {PiData, Rest, State}
%%           PiData = string()
%% Description: Parse the data part of the processing instruction. 
%%              If next character is valid it's put in the accumulator.
%%----------------------------------------------------------------------
parse_pi_data(?STRING_EMPTY, State, Acc) ->
    cf(?STRING_EMPTY, State, Acc, fun parse_pi_data/3);
parse_pi_data(?STRING("?") = Bytes, State, Acc) ->
    cf(Bytes, State, Acc, fun parse_pi_data/3);
parse_pi_data(?STRING("\r") = Bytes, State, Acc) ->
    cf(Bytes, State, Acc, fun parse_pi_data/3);
parse_pi_data(?STRING_REST("?>", Rest), State, Acc) ->
    {lists:reverse(Acc), Rest, State};
parse_pi_data(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
    parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
parse_pi_data(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
    parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
parse_pi_data(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
    parse_pi_data(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
parse_pi_data(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_char(C)->
    parse_pi_data(Rest, State, [C|Acc]);
parse_pi_data(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_pi_data/3], 
			     "not an character").


%%----------------------------------------------------------------------
%% Function: parse_cdata(Rest, State) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {Rest, State}
%% Description: Start the parsing of a CDATA block.
%%              [18] CDSect ::= CDStart CData CDEnd
%%              [19] CDStart ::= '<![CDATA['
%%              [20] CData ::= (Char* - (Char* ']]>' Char*))
%%              [21] CDEnd ::= ']]>'
%%----------------------------------------------------------------------
parse_cdata(?STRING_EMPTY, State) -> 
    cf(?STRING_EMPTY, State, fun parse_cdata/2);
parse_cdata(?STRING("[") = Bytes, State) ->
    cf(Bytes, State, fun parse_cdata/2);
parse_cdata(?STRING("[C") = Bytes, State) ->
    cf(Bytes, State, fun parse_cdata/2);
parse_cdata(?STRING("[CD") = Bytes, State) ->
    cf(Bytes, State, fun parse_cdata/2);
parse_cdata(?STRING("[CDA") = Bytes, State) ->
    cf(Bytes, State, fun parse_cdata/2);
parse_cdata(?STRING("[CDAT") = Bytes, State) ->
    cf(Bytes, State, fun parse_cdata/2);
parse_cdata(?STRING("[CDATA") = Bytes, State) ->
    cf(Bytes, State, fun parse_cdata/2);
parse_cdata(?STRING_REST("[CDATA[", Rest), State) ->
    State1 = event_callback(startCDATA, State),	   
    parse_cdata(Rest, State1, []);
parse_cdata(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_cdata/2],
			     "expecting comment or CDATA").


%%----------------------------------------------------------------------
%% Function: parse_cdata(Rest, State, Acc) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           Acc = string()
%% Output:   Result = {Rest, State}
%% Description: Parse a CDATA block.
%%----------------------------------------------------------------------
parse_cdata(?STRING_EMPTY, State, Acc) -> 
    cf(?STRING_EMPTY, State, Acc, fun parse_cdata/3);
parse_cdata(?STRING("\r") = Bytes, State, Acc) -> 
    cf(Bytes, State, Acc, fun parse_cdata/3);
parse_cdata(?STRING("]") = Bytes, State, Acc) -> 
    cf(Bytes, State, Acc, fun parse_cdata/3);
parse_cdata(?STRING("]]") = Bytes, State, Acc) -> 
    cf(Bytes, State, Acc, fun parse_cdata/3);
parse_cdata(?STRING_REST("]]>", Rest), State, Acc) -> 
    State1 = event_callback({characters, lists:reverse(Acc)}, State),   
    State2 = event_callback(endCDATA, State1),	    
    parse_content(Rest, State2, [], true);
parse_cdata(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 
    parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
parse_cdata(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 
    parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
parse_cdata(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) -> 
    parse_cdata(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
parse_cdata(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_char(C) -> 
    parse_cdata(Rest, State, [C|Acc]);
parse_cdata(?STRING_UNBOUND_REST(C, _), State, _) -> 
    ?fatal_error(State, "CDATA contains bad character value: " ++ [C]);
parse_cdata(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_cdata/3], 
			     undefined).


%%----------------------------------------------------------------------
%% Function: parse_comment(Rest, State, Acc) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           Acc = string()
%% Output:   Result = {Rest, State}
%% Description: Parse a comment.
%%              [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
%%----------------------------------------------------------------------
parse_comment(?STRING_EMPTY, State, Acc) -> 
    cf(?STRING_EMPTY, State, Acc, fun parse_comment/3);
parse_comment(?STRING("\r") = Bytes, State, Acc) -> 
    cf(Bytes, State, Acc, fun parse_comment/3);
parse_comment(?STRING("-") = Bytes, State, Acc) -> 
    cf(Bytes, State, Acc, fun parse_comment/3);
parse_comment(?STRING("--") = Bytes, State, Acc) -> 
    cf(Bytes, State, Acc, fun parse_comment/3);
parse_comment(?STRING_REST("-->", Rest), State, Acc) -> 
    State1 = event_callback({comment, lists:reverse(Acc)}, State),   
    {Rest, State1};
parse_comment(?STRING_REST("--",  _), State, _) -> 
    ?fatal_error(State, "comment contains '--'");
parse_comment(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
    parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]);
parse_comment(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
    parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]);
parse_comment(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Acc) ->
    parse_comment(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf|Acc]);
parse_comment(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
    if 
	?is_char(C) ->
	    parse_comment(Rest, State, [C|Acc]);
	true ->
	     ?fatal_error(State, "Bad character in comment: " ++ C)
    end;
parse_comment(Bytes, State, Acc)   -> 
     unicode_incomplete_check([Bytes, State, Acc, fun parse_comment/3], 
			     undefined).


%%----------------------------------------------------------------------
%% Function: parse_misc(Rest, State, Eod) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           Eod = true |false
%% Output:   Result = {Rest, State}
%% Description: Parse a misc clause, could be a comment, a processing
%%              instruction or whitespace. If the input stream is empty 
%%              (Eod parameter true) then we return current state and quit.
%%              [27] Misc ::= Comment | PI |  S
%%----------------------------------------------------------------------
parse_misc(?STRING_EMPTY, State, true) ->
    {?STRING_EMPTY, State}; 
parse_misc(?STRING_EMPTY, State, Eod) ->
    cf(?STRING_EMPTY, State, Eod, fun parse_misc/3);
parse_misc(?STRING("<") = Rest, State, Eod) ->
    cf(Rest, State, Eod, fun parse_misc/3);
parse_misc(?STRING_REST("<?", Rest), State, Eod) ->
    {Rest1, State1} = parse_pi(Rest, State),
    parse_misc(Rest1, State1, Eod);
parse_misc(?STRING("<!") = Rest, State, Eod) ->
    cf(Rest, State, Eod, fun parse_misc/3);
parse_misc(?STRING("<!-") = Rest, State, Eod) ->
    cf(Rest, State, Eod, fun parse_misc/3);
parse_misc(?STRING_REST("<!--", Rest), State, Eod) ->
    {Rest1, State1} = parse_comment(Rest, State, []),
    parse_misc(Rest1, State1, Eod);
parse_misc(?STRING_UNBOUND_REST(C, _) = Rest, State, Eod) when ?is_whitespace(C) -> 
    {_WS, Rest1, State1} = whitespace(Rest, State, []),
    parse_misc(Rest1, State1, Eod);
parse_misc(Rest, State, _Eod) ->
    {Rest, State}.
%%    unicode_incomplete_check([Bytes, State, Eod, fun parse_misc/3], 
%%			     "expecting comment or PI").

%%----------------------------------------------------------------------
%% Function: parse_stag(Rest, State) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {Rest, State}
%% Description: Parsing a start tag.
%%              [40] STag ::= '<' Name (S Attribute)* S? '>'
%%----------------------------------------------------------------------
parse_stag(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_stag/2);
parse_stag(?STRING_UNBOUND_REST(C, Rest), State) ->
    case is_name_start(C) of 
	true ->
	    {TagName, Rest1, State1} = 
		parse_ns_name(Rest, State, [], [C]),
	    parse_attributes(Rest1, State1, {TagName, [], []});
	false ->
	    ?fatal_error(State, "expecting name")
    end;
parse_stag(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_stag/2],
			      undefined).

%%----------------------------------------------------------------------
%% Function: parse_attributes(Rest, State, CurrentTag) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%%           CurrentTag = {Name, AttList, NewNsList}
%%           Name = string()
%%           AttList = [{Name, Value}]
%%           NewNsList = [{Name, Value}]
%% Output:   Result = {Rest, State}
%% Description: Parsing the attribute list in the start tag. The current 
%%              tag tuple contains the tag name, a list of attributes 
%%              (exclusive NS attributes) and a list of new NS attributes.
%%              [41] Attribute ::= Name Eq AttValue
%%----------------------------------------------------------------------
parse_attributes(?STRING_EMPTY, State, CurrentTag) ->
    cf(?STRING_EMPTY, State, CurrentTag, fun parse_attributes/3);
parse_attributes(?STRING("/") = Bytes, State, CurrentTag) -> 
    cf(Bytes, State, CurrentTag, fun parse_attributes/3);
parse_attributes(?STRING_REST("/>", Rest), State, {Tag, AttList, NewNsList}) ->
    CompleteNsList =  NewNsList ++ State#xmerl_sax_parser_state.ns,
    {Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList),
    State1 =  send_start_prefix_mapping_event(lists:reverse(NewNsList), State),
    State2 =  event_callback({startElement, Uri, LocalName, QName, Attributes}, State1),
    State3 =  event_callback({endElement, Uri, LocalName, QName}, State2),
    State4 =  send_end_prefix_mapping_event(NewNsList, State3),
    parse_content(Rest, State4, [], true);
parse_attributes(?STRING_REST(">", Rest), #xmerl_sax_parser_state{end_tags=ETags, ns = OldNsList} = State, 
		 {Tag, AttList, NewNsList}) ->
    CompleteNsList =  NewNsList ++ OldNsList,
    {Uri, LocalName, QName, Attributes} = fix_ns(Tag, AttList, CompleteNsList),
    State1 =  send_start_prefix_mapping_event(lists:reverse(NewNsList), State),
    State2 =  event_callback({startElement, Uri, LocalName, QName, Attributes}, State1),
    parse_content(Rest, State2#xmerl_sax_parser_state{end_tags=[{Tag, Uri, LocalName, QName, 
							  OldNsList, NewNsList} |ETags],
					       ns = CompleteNsList}, 
		  [], true);
parse_attributes(?STRING_UNBOUND_REST(C, _) = Rest, State, CurrentTag) when ?is_whitespace(C) ->
    {_WS, Rest1, State1} = whitespace(Rest, State, []),
    parse_attributes(Rest1, State1, CurrentTag);
parse_attributes(?STRING_UNBOUND_REST(C, Rest), State, {Tag, AttList, NsList}) -> 
    case is_name_start(C) of
	true ->
	    {AttrName, Rest1, State1} = 
		parse_ns_name(Rest, State, [], [C]),
	    {Rest2, State2} = parse_eq(Rest1, State1),
	    {AttValue, Rest3, State3} = parse_att_value(Rest2, State2),
	    case AttrName of
		{"xmlns", NsName} ->
		    parse_attributes(Rest3, State3, {Tag, AttList, [{NsName, AttValue} |NsList]});
		{"", "xmlns"} ->
		    parse_attributes(Rest3, State3, {Tag, AttList, [{"", AttValue} |NsList]});
		{_Prefix, _LocalName} ->
		    case lists:keyfind(AttrName, 1, AttList) of
			false ->
			    parse_attributes(Rest3, State3, {Tag, [{AttrName, AttValue}|AttList], NsList});
			_ ->
			    ElName =
				case Tag of
				    {"", N} -> N;
				    {Ns, N} -> Ns ++ ":" ++ N
				end,
			    ?fatal_error(State,  "Attribute exist more than once in element: " ++ ElName)
		    end
	    end;
	false ->
	    ?fatal_error(State,  "Invalid start character in attribute name: " ++ [C])
    end;
parse_attributes(Bytes, State, CurrentTag) ->
    unicode_incomplete_check([Bytes, State, CurrentTag, fun parse_attributes/3],
			      "expecting name, whitespace, /> or >").



%%----------------------------------------------------------------------
%% Function: fix_ns({Prefix, Name}, Attributes, Ns) -> Result
%% Input:    Prefix = string()
%%           Name = string()
%%           Attributes = [{Name, Value}]
%%           Ns = [{Prefix, Uri}]
%%           Uri = string()
%% Output:   Result = {Uri, Name, QualifiedName, Attributes}
%%           QualifiedName = string()
%% Description: Fix the name space prefixing for the attributes and start tag.
%%----------------------------------------------------------------------
% fix_ns({"", Name}, Attributes, Ns) ->
%     Attributes2 = fix_attributes_ns(Attributes, Ns, []),
%     {"", Name, Name, Attributes2};
fix_ns({Prefix, Name}, Attributes, Ns) ->
    Uri = 
	case lists:keysearch(Prefix, 1, Ns) of
	    {value, {Prefix, U}} -> 
		U;
	    false -> 
		""
	end,
    Attributes2 = fix_attributes_ns(Attributes, Ns, []),
 
    {Uri, Name, {Prefix, Name}, Attributes2}.

%%----------------------------------------------------------------------
%% Function: fix_attributes_ns(Attributes, Ns, Acc) -> Result
%% Input:    Attributes = [{{Prefix, Name}, Value}]
%%           Prefix = string()
%%           Name = string()
%%           Value = string()
%%           Ns = [{Prefix, Uri}]
%%           Uri = string()
%% Output:   Result = [{Uri, Name, Value}]
%% Description: Fix the name spaces for the attributes.
%%----------------------------------------------------------------------
fix_attributes_ns([], _, Acc) ->
    Acc;
fix_attributes_ns([{{"", Name}, AttrValue} | Attrs], Ns, Acc) ->
    fix_attributes_ns(Attrs, Ns, [{"", "", Name, AttrValue} |Acc]);
fix_attributes_ns([{{Prefix, Name}, AttrValue} | Attrs], Ns, Acc) ->
    Uri = 
	case lists:keysearch(Prefix, 1, Ns) of
	    {value, {Prefix, U}} -> 
		U;
	    false -> 
		""
	end,    
    fix_attributes_ns(Attrs, Ns, [{Uri, Prefix, Name, AttrValue} |Acc]).
    

%%----------------------------------------------------------------------
%% Function: send_start_prefix_mapping_event(Ns, State) -> Result
%% Input:    Ns = [{Prefix, Uri}]
%%           Prefix = string()
%%           Uri = string()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = #xmerl_sax_parser_state{}
%% Description: Loops over a name space list and sends startPrefixMapping events.
%%----------------------------------------------------------------------
send_start_prefix_mapping_event([], State) ->
    State;
send_start_prefix_mapping_event([{Prefix, Uri} |Ns], State) ->
    State1 = event_callback({startPrefixMapping, Prefix, Uri}, State),
    send_start_prefix_mapping_event(Ns, State1).

 
%%----------------------------------------------------------------------
%% Function: send_end_prefix_mapping_event(Ns, State) -> Result
%% Input:    Ns = [{Prefix, Uri}]
%%           Prefix = string()
%%           Uri = string()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = #xmerl_sax_parser_state{}
%% Description: Loops over a name space list and sends endPrefixMapping events.
%%----------------------------------------------------------------------
send_end_prefix_mapping_event([], State) ->
    State;
send_end_prefix_mapping_event([{Prefix, _Uri} |Ns], State) ->
    State1 = event_callback({endPrefixMapping, Prefix}, State),
    send_end_prefix_mapping_event(Ns, State1).

   
%%----------------------------------------------------------------------
%% Function: parse_eq(Rest, State) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {Rest, State}
%% Description: Parsing an '=' from the stream.
%%              [25] Eq ::= S? '=' S?
%%----------------------------------------------------------------------
parse_eq(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_eq/2);
parse_eq(?STRING_REST("=", Rest), State) ->
	{Rest, State};
parse_eq(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->	
	{_WS, Rest, State1} = 
		whitespace(Bytes, State, []),
	parse_eq(Rest, State1);
parse_eq(Bytes, State) ->	
    unicode_incomplete_check([Bytes, State, fun parse_eq/2], 
			     "expecting = or whitespace"). 


%%----------------------------------------------------------------------
%% Function: parse_att_value(Rest, State) -> Result
%% Input:    Rest = string() | binary()
%%           State = #xmerl_sax_parser_state{}
%% Output:   Result = {Rest, State}
%% Description: Start the parsing of an attribute value by checking the delimiter
%%              [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
%%              	       |  "'" ([^<&'] | Reference)* "'"
%%----------------------------------------------------------------------
parse_att_value(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_att_value/2);
parse_att_value(?STRING_UNBOUND_REST(C, Rest), State)  when C == $'; C == $"  ->	
	parse_att_value(Rest, State, C, []);
parse_att_value(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->	
	{_WS, Rest, State1} = 
		whitespace(Bytes, State, []),
	parse_att_value(Rest, State1);
parse_att_value(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_att_value/2], 
			     "\', \" or whitespace expected"). 


%%----------------------------------------------------------------------
%% Function  : parse_att_value(Rest, State, Stop, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Stop = $' | $"
%%             Acc = string()
%% Result    : {Value, Rest, State}
%%             Value = string()
%% Description: Parse an attribute value
%%----------------------------------------------------------------------
parse_att_value(?STRING_EMPTY, State, undefined, Acc) ->
    {Acc, [], State}; %% stop clause when parsing references
parse_att_value(?STRING_EMPTY, State, Stop, Acc) ->
    cf(?STRING_EMPTY, State, Stop, Acc, fun parse_att_value/4);
parse_att_value(?STRING("\r") = Bytes, State, Stop, Acc) ->
    cf(Bytes, State, Stop, Acc, fun parse_att_value/4);
parse_att_value(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 
    parse_att_value(Rest, 
		    State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_att_value(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 
    parse_att_value(Rest, 
		    State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_att_value(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc)  -> 
    parse_att_value(Rest, 
		    State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_att_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc)  -> 
    parse_att_value(Rest, 
		    State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_att_value(?STRING_REST("&", Rest), State, Stop, Acc)  -> 
    {Ref, Rest1, State1} = parse_reference(Rest, State, true),
    case Ref of 
	{character, _, CharValue}  ->
	    parse_att_value(Rest1, State1, Stop, [CharValue | Acc]); 
	{internal_general, true, _, Value} ->
	    parse_att_value(Rest1, State1, Stop, Value ++ Acc);
	{internal_general, false, _, Value} ->
	    {ParsedValue, [], State2} = parse_att_value(?TO_INPUT_FORMAT(Value), State1, undefined, []),
	    parse_att_value(Rest1, State2, Stop, ParsedValue ++ Acc);
	{external_general, Name, _} ->
	    ?fatal_error(State1, "External parsed entity reference in attribute value: " ++ Name);
	{not_found, Name} ->
	    case State#xmerl_sax_parser_state.skip_external_dtd of
		false ->
		    ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared
		true ->
		    parse_att_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc)
	    end;
	{unparsed, Name, _}  ->
	    ?fatal_error(State1, "Unparsed entity reference in  attribute value: " ++ Name)
    end;
parse_att_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
    {lists:reverse(Acc), Rest, State};
parse_att_value(?STRING_UNBOUND_REST($<, _Rest), State, _Stop, _Acc)   ->
    ?fatal_error(State,  "< not allowed in attribute value");
parse_att_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc)   ->
    if
	?is_char(C) ->
	    parse_att_value(Rest, State, Stop, [C|Acc]);
	true ->
	     ?fatal_error(State, lists:flatten(io_lib:format("Bad character in attribute value: ~p", [C])))
    end;
parse_att_value(Bytes, State, Stop, Acc)   ->
    unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_att_value/4],
			     undefined).


%%----------------------------------------------------------------------
%% Function  : parse_etag(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Parse the end tag
%%              [42] ETag ::= '</' Name S? '>'
%%----------------------------------------------------------------------
parse_etag(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_etag/2);
parse_etag(?STRING_UNBOUND_REST(C, Rest), 
	   #xmerl_sax_parser_state{end_tags=[{ETag, _Uri, _LocalName, _QName, _OldNsList, _NewNsList}
				      |_RestOfETags]} = State) ->
    case is_name_start(C) of
	true ->
	    {Tag, Rest1, State1} = parse_ns_name(Rest, State, [], [C]),
	    case Tag == ETag of 
		true ->
		    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
		    parse_etag_1(Rest2, State2, Tag);
		false ->
		    case State1#xmerl_sax_parser_state.match_end_tags of
			true ->
			    {P,TN} = Tag,
			    ?fatal_error(State1, "EndTag: " ++ P ++ ":" ++ TN ++ 
					 ", does not match StartTag");
			false ->
			    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
			    parse_etag_1(Rest2, State2, Tag)
		    end
	    end;
	false ->
	    ?fatal_error(State, "Name expected")
    end;
parse_etag(?STRING_UNBOUND_REST(_C, _) = Rest, #xmerl_sax_parser_state{end_tags=[]}= State) ->
    {Rest, State};
parse_etag(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_etag/2], 
			     undefined).


parse_etag_1(?STRING_REST(">", Rest), 
	     #xmerl_sax_parser_state{end_tags=[{_ETag, Uri, LocalName, QName, OldNsList, NewNsList}
					|RestOfETags]} = State, _Tag) ->
    State1 =  event_callback({endElement, Uri, LocalName, QName}, State),
    State2 =  send_end_prefix_mapping_event(NewNsList, State1),
    parse_content(Rest, 
		  State2#xmerl_sax_parser_state{end_tags=RestOfETags,
					 ns = OldNsList},
		  [], true);
parse_etag_1(?STRING_UNBOUND_REST(_C, _), State, Tag) ->
    {P,TN} = Tag,
    ?fatal_error(State, "Bad EndTag: " ++ P ++ ":" ++ TN);
parse_etag_1(Bytes, State, Tag) ->
    unicode_incomplete_check([Bytes, State, Tag, fun parse_etag_1/3], 
			     undefined).
    
%%----------------------------------------------------------------------
%% Function: parse_content(Rest, State, Acc, IgnorableWS) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Acc = string()
%%             IgnorableWS = true | false
%% Result    : {Rest, State}
%% Description: Parsing the content part of tags
%%              [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
%%----------------------------------------------------------------------

parse_content(?STRING_EMPTY, State, Acc, IgnorableWS) ->
    case catch cf(?STRING_EMPTY, State, Acc, IgnorableWS, fun parse_content/4) of
	{Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
	    {Rest, State1};
	{fatal_error, {State1, Msg}} ->
	    case check_if_document_complete(State1, Msg) of
		true ->
		    State2 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State1),
		    {?STRING_EMPTY, State2};
		false ->
		    ?fatal_error(State1, Msg)
	    end;
	Other ->
	    throw(Other)
    end;
parse_content(?STRING("\r") = Bytes, State, Acc, IgnorableWS) ->
    cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
parse_content(?STRING("<") = Bytes, State, Acc, IgnorableWS) ->	
    cf(Bytes, State, Acc, IgnorableWS, fun parse_content/4);
parse_content(?STRING_REST("</", Rest), State, Acc, IgnorableWS) ->
    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
    parse_etag(Rest, State1);
parse_content(?STRING("<!") = Bytes, State, _Acc, IgnorableWS) ->
    cf(Bytes, State, [], IgnorableWS, fun parse_content/4);
parse_content(?STRING("<!-") = Bytes, State, _Acc, IgnorableWS) ->
    cf(Bytes, State, [], IgnorableWS, fun parse_content/4);
parse_content(?STRING_REST("<!--", Rest), State, Acc, IgnorableWS) ->
    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
    {Rest1, State2} = parse_comment(Rest, State1, []),
    parse_content(Rest1, State2, [], true);
parse_content(?STRING_REST("<?", Rest), State, Acc, IgnorableWS) ->
    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
    {Rest1, State2} = parse_pi(Rest, State1),
    parse_content(Rest1, State2, [], true);
parse_content(?STRING_REST("<!", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
    case ET of 
	[] ->
	    {Rest, State}; %%LATH : Skicka ignorable WS ???
	_ ->
	    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
	    parse_cdata(Rest1, State1)
    end;
parse_content(?STRING_REST("<", Rest1) = Rest, #xmerl_sax_parser_state{end_tags = ET} = State, Acc, IgnorableWS) ->
    case ET of 
	[] ->
	    {Rest, State}; %%LATH :  Skicka ignorable WS ???
	_ ->
	    State1 = send_character_event(length(Acc), IgnorableWS, lists:reverse(Acc), State),
	    parse_stag(Rest1, State1)
    end;
parse_content(?STRING_REST("\n", Rest), State, Acc, IgnorableWS) ->
    N = State#xmerl_sax_parser_state.line_no,
    parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
parse_content(?STRING_REST("\r\n", Rest), State, Acc, IgnorableWS) ->
    N = State#xmerl_sax_parser_state.line_no,
    parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
parse_content(?STRING_REST("\r", Rest), State, Acc, IgnorableWS) ->
    N = State#xmerl_sax_parser_state.line_no,
    parse_content(Rest, State#xmerl_sax_parser_state{line_no=N+1},[?lf |Acc], IgnorableWS);
parse_content(?STRING_REST(" ", Rest), State, Acc, IgnorableWS) ->
    parse_content(Rest, State,[?space |Acc], IgnorableWS);
parse_content(?STRING_REST("\t", Rest), State, Acc, IgnorableWS) ->
    parse_content(Rest, State,[?tab |Acc], IgnorableWS);
parse_content(?STRING_REST("]]>", _Rest), State, _Acc, _IgnorableWS) ->
    ?fatal_error(State, "\"]]>\" is not allowed in content");
parse_content(?STRING_UNBOUND_REST(_C, _) = Rest, 
	      #xmerl_sax_parser_state{end_tags = []} = State, 
	      _Acc, _IgnorableWS) ->
    {Rest, State};
parse_content(?STRING_REST("&", Rest), State, Acc, _IgnorableWS) ->
    {Ref, Rest1, State1} = parse_reference(Rest, State, true),
    case Ref of 
	{character, _, CharValue}  ->
	    parse_content(Rest1, State1, [CharValue | Acc], false);
	{internal_general, true, _, Value} ->
	    parse_content(Rest1, State1, Value ++ Acc, false);
	{internal_general, false, _, Value} ->
	    IValue = ?TO_INPUT_FORMAT(Value),
	    parse_content(?APPEND_STRING(IValue, Rest1), State1, Acc, false);     
	{external_general, _, {PubId, SysId}} ->
	    State2 = parse_external_entity(State1, PubId, SysId),
	    parse_content(Rest1, State2, Acc, false);
	{not_found, Name} ->
	    case State#xmerl_sax_parser_state.skip_external_dtd of
		false ->
		    ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared
		true ->
		    parse_content(Rest1, State1, ";" ++ lists:reverse(Name) ++ "&" ++ Acc, false)
	    end;
	{unparsed, Name, _}  ->
	    ?fatal_error(State1, "Unparsed entity reference in content: " ++ Name)
    end;
parse_content(?STRING_UNBOUND_REST(C, Rest), State, Acc, _IgnorableWS) ->
    if 
	?is_char(C) ->
	    parse_content(Rest, State, [C|Acc], false);
	true ->
	     ?fatal_error(State, lists:flatten(io_lib:format("Bad character in content: ~p", [C])))
    end;
parse_content(Bytes, State, Acc, IgnorableWS)   ->
    unicode_incomplete_check([Bytes, State, Acc, IgnorableWS, fun parse_content/4],
			     undefined).


%%----------------------------------------------------------------------
%% Function: check_if_document_complete(State, ErrorMsg) -> Result
%% Parameters: State = #xmerl_sax_parser_state{}
%%             ErrorMsg = string()
%% Result    : boolean()
%% Description: Checks that the document is complete if we don't have more data..
%%----------------------------------------------------------------------
check_if_document_complete(#xmerl_sax_parser_state{end_tags = []}, 
			    "No more bytes") ->
    true;
check_if_document_complete(#xmerl_sax_parser_state{end_tags = []}, 
			    "Continuation function undefined") ->
    true;
check_if_document_complete(_, _) ->
    false.

%%----------------------------------------------------------------------
%% Function: send_character_event(Length, IgnorableWS, String, State) -> Result
%% Parameters: Length = integer()
%%             IgnorableWS = true | false
%%             String = string()
%%             State = #xmerl_sax_parser_state{}
%% Result    : #xmerl_sax_parser_state{}
%% Description: Sends the correct type of character event depending on if
%%              it's whitespaces that can be ignored or not.
%%----------------------------------------------------------------------
send_character_event(0, _, _, State) ->
    State;
send_character_event(_, false, String, State) ->
    event_callback({characters, String}, State);
send_character_event(_, true, String, State) ->
    event_callback({ignorableWhitespace, String}, State).


%%----------------------------------------------------------------------
%% Function: whitespace(Rest, State, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Acc = string()
%% Result    : {Rest, State}
%% Description: Parse whitespaces.
%%              [3] S ::= (#x20 | #x9 | #xD | #xA)+
%%----------------------------------------------------------------------
whitespace(?STRING_EMPTY, State, Acc) ->  
    case cf(?STRING_EMPTY, State, Acc, fun whitespace/3) of
	{?STRING_EMPTY, State} ->
	    {lists:reverse(Acc), ?STRING_EMPTY, State};
	Ret ->
	    Ret
    end;
whitespace(?STRING("\r") = Bytes, State, Acc) -> 
    case cf(Bytes, State, Acc, fun whitespace/3) of
	{?STRING("\r") = Bytes, State} ->
	    {lists:reverse(Acc), Bytes, State}; 
	Ret ->
	    Ret
    end;
whitespace(?STRING_REST("\n", Rest), State, Acc) -> 
    N = State#xmerl_sax_parser_state.line_no,
    whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
whitespace(?STRING_REST("\r\n", Rest), State, Acc) -> 
    N = State#xmerl_sax_parser_state.line_no,
    whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
whitespace(?STRING_REST("\r", Rest), State, Acc) -> 
    N = State#xmerl_sax_parser_state.line_no,
    whitespace(Rest, State#xmerl_sax_parser_state{line_no=N+1}, [?lf |Acc]);
whitespace(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_whitespace(C) -> 
    whitespace(Rest, State, [C|Acc]);
whitespace(?STRING_UNBOUND_REST(_C, _) = Bytes, State, Acc) -> 
    {lists:reverse(Acc), Bytes, State};
whitespace(Bytes, #xmerl_sax_parser_state{encoding=Enc} = State, Acc) when is_binary(Bytes) -> 
    case unicode:characters_to_list(Bytes, Enc) of 
	{incomplete, _, _} ->
	    cf(Bytes, State, Acc, fun whitespace/3);
	{error, _Encoded, _Rest} ->
	    ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc])))
    end.


%%----------------------------------------------------------------------
%% Function: parse_reference(Rest, State, HaveToExist) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Value, Rest, State}
%% Description: Parse entity references.
%%              [66] CharRef ::= '&#' [0-9]+ ';'
%%              	       | '&#x' [0-9a-fA-F]+ ';'
%%              [67] Reference ::= EntityRef | CharRef
%%              [68] EntityRef ::= '&' Name ';'
%%----------------------------------------------------------------------
parse_reference(?STRING_EMPTY, State, HaveToExist) ->
    cf(?STRING_EMPTY, State, HaveToExist, fun parse_reference/3);
parse_reference(?STRING("#") = Bytes, State, HaveToExist) -> 
    cf(Bytes, State, HaveToExist, fun parse_reference/3);
parse_reference(?STRING_REST("#x", Rest), State, _HaveToExist) ->
    {CharValue, RefString, Rest1, State1} = parse_hex(Rest, State, []),
    if 
	?is_char(CharValue) ->
	    {{character, is_delimiter(CharValue), CharValue},
	     Rest1, State1};
	true ->
	    ?fatal_error(State1, "Not a legal character: #x" ++ RefString) %%WFC: Legal Character
    end;
parse_reference(?STRING_REST("#", Rest), State, _HaveToExist) ->
    {CharValue, RefString, Rest1, State1} = parse_digit(Rest, State, []),
    if 
	?is_char(CharValue) ->
	    {{character, is_delimiter(CharValue), CharValue},
	     Rest1, State1};
	true ->
	    ?fatal_error(State1, "Not a legal character: #" ++ RefString)%%WFC: Legal Character
    end;
parse_reference(?STRING_UNBOUND_REST(C, Rest), State, HaveToExist) ->
    case is_name_start(C) of
	true ->
	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
	    parse_reference_1(Rest1, State1, HaveToExist, Name);
	false -> 
	    ?fatal_error(State, "name expected")
    end;
parse_reference(Bytes, State, HaveToExist) ->
    unicode_incomplete_check([Bytes, State, HaveToExist, fun parse_reference/3], 
			     underfined).


parse_reference_1(?STRING_REST(";", Rest), State, HaveToExist, Name) ->
    case look_up_reference(Name, HaveToExist, State) of
	{internal_general, Name, RefValue} ->
	    {{internal_general, is_delimiter(RefValue), Name, RefValue}, 
	     Rest, State};
	Result ->
	    {Result, Rest, State}
    end;
parse_reference_1(Bytes, State, HaveToExist, Name) ->
    unicode_incomplete_check([Bytes, State, HaveToExist, Name, fun parse_reference_1/4], 
			     "Missing semicolon after reference: " ++ Name).



%%----------------------------------------------------------------------
%% Function: is_delimiter(Character) -> Result
%% Parameters: Character
%% Result    :
%%----------------------------------------------------------------------
is_delimiter(38) ->
     true;
is_delimiter(60) -> 
     true;
is_delimiter(62) ->
     true;
is_delimiter(39) ->
     true;
is_delimiter(34) ->
     true;
is_delimiter("&") ->
     true;
is_delimiter("<") ->
     true;
is_delimiter(">") ->
     true;
is_delimiter("'") ->
     true;
is_delimiter("\"") ->
     true;
is_delimiter(_) ->
     false.

%%----------------------------------------------------------------------
%% Function: parse_pe_reference(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Acc = string()
%% Result    : {Result, Rest, State}
%% Description: Parse a parameter entity reference.
%%              [69] PEReference ::= '%' Name ';'
%%----------------------------------------------------------------------
parse_pe_reference(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_pe_reference/2);
parse_pe_reference(?STRING_UNBOUND_REST(C, Rest), State) ->
    case is_name_start(C) of
	true ->
	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
	    parse_pe_reference_1(Rest1, State1, Name);
	false -> 
	    ?fatal_error(State, "Name expected") 
    end;
parse_pe_reference(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_pe_reference/2], 
			     underfined).


parse_pe_reference_1(?STRING_REST(";", Rest), State, Name) ->
    Name1 = "%" ++ Name,
    Result = look_up_reference(Name1, true, State),
    {Result, Rest, State};
parse_pe_reference_1(Bytes, State, Name) ->
    unicode_incomplete_check([Bytes, State, Name, fun parse_pe_reference_1/3], 
			     "missing ; after reference " ++ Name).



%%----------------------------------------------------------------------
%% Function: insert_reference(Reference, State) -> Result
%% Parameters: Reference = string()
%%             State = #xmerl_sax_parser_state{}
%% Result    :
%%----------------------------------------------------------------------
insert_reference({Name, Type, Value}, Table) ->
    case ets:lookup(Table, Name) of
	[{Name, _, _}] ->
	    ok;
	_ ->
	    ets:insert(Table, {Name, Type, Value})
    end.
	    


%%----------------------------------------------------------------------
%% Function: look_up_reference(Reference, State) -> Result
%% Parameters: Reference = string()
%%             State = #xmerl_sax_parser_state{}
%% Result    :
%%----------------------------------------------------------------------
look_up_reference("amp", _, _) ->
    {internal_general, "amp", "&"};
look_up_reference("lt", _, _) ->
    {internal_general, "lt", "<"};
look_up_reference("gt", _, _) ->
    {internal_general, "gt", ">"};
look_up_reference("apos", _, _) ->
    {internal_general, "apos", "'"};
look_up_reference("quot", _, _) ->
    {internal_general, "quot", "\""};
look_up_reference(Name, HaveToExist, State) ->
    case ets:lookup(State#xmerl_sax_parser_state.ref_table, Name) of
	[{Name, Type, Value}] ->
	    {Type, Name, Value};
	_ ->
	    case HaveToExist of
		true ->
		    case State#xmerl_sax_parser_state.standalone of
			yes ->
			    ?fatal_error(State, "Entity not declared: " ++ Name); %%WFC: Entity Declared 
			no ->
			    {not_found, Name}  %%VC: Entity Declared
		    end;
		false ->
		    {not_found, Name}
	    end
    end.


%%----------------------------------------------------------------------
%% Function: parse_hex(Rest, State, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Acc = string()
%% Result    : {Value, Reference, Rest, State}
%%             Value = integer()
%%             Reference = string()
%% Description: Parse a hex reference.
%%----------------------------------------------------------------------
parse_hex(?STRING_EMPTY, State, Acc) ->
    cf(?STRING_EMPTY, State, Acc, fun parse_hex/3);
parse_hex(?STRING_REST(";", Rest), State, Acc) ->
    RefString = lists:reverse(Acc),
    {erlang:list_to_integer(RefString, 16), RefString, Rest, State};
parse_hex(?STRING_UNBOUND_REST(C, Rest), State, Acc) when ?is_hex_digit(C) ->
    parse_hex(Rest, State, [C |Acc]);
parse_hex(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_hex/3],
			     "Bad hex value in reference: "). 


%%----------------------------------------------------------------------
%% Function: parse_digit(Rest, State, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Acc = string()
%% Result    : {Value, Reference, Rest, State}
%%             Value = integer()
%%             Reference = string()
%% Description: Parse a decimal reference.
%%----------------------------------------------------------------------
parse_digit(?STRING_EMPTY, State, Acc) ->
    cf(?STRING_EMPTY, State, Acc, fun parse_digit/3);
parse_digit(?STRING_REST(";", Rest), State, Acc) ->
    RefString = lists:reverse(Acc),
    {list_to_integer(RefString), RefString, Rest, State};
parse_digit(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
    case is_digit(C) of
	true ->
	    parse_digit(Rest, State, [C |Acc]);
	false ->
	    ?fatal_error(State, "Character in reference not a digit: " ++ [C])
    end;
parse_digit(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_digit/3], 
			     undefined).

%%----------------------------------------------------------------------
%% Function: parse_system_litteral(Rest, State, Stop, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Stop = $' | $"
%%             Acc = string()
%% Result    : {Value, Reference, Rest, State}
%%             Value = integer()
%%             Reference = string()
%% Description: Parse a system litteral.
%%              [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
%%----------------------------------------------------------------------
parse_system_litteral(?STRING_EMPTY, State, Stop, Acc) ->
    cf(?STRING_EMPTY, State, Stop, Acc, fun parse_system_litteral/4);
parse_system_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
    {lists:reverse(Acc), Rest, State};
parse_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
   parse_system_litteral(Rest, State, Stop, [C |Acc]);
parse_system_litteral(Bytes, State, Stop, Acc) ->
    unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_system_litteral/4], 
			     undefined).

%%----------------------------------------------------------------------
%% Function: parse_pubid_litteral(Rest, State, Stop, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Stop = $' | $"
%%             Acc = string()
%% Result    : {Value, Reference, Rest, State}
%%             Value = integer()
%%             Reference = string()
%% Description: Parse a public idlitteral.
%%              [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
%%----------------------------------------------------------------------
parse_pubid_litteral(?STRING_EMPTY, State, Stop, Acc) ->
    cf(?STRING_EMPTY, State, Stop, Acc, fun parse_pubid_litteral/4);
parse_pubid_litteral(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
    {lists:reverse(Acc), Rest, State};
parse_pubid_litteral(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc) ->
    case is_pubid_char(C) of
	true ->
	    parse_pubid_litteral(Rest, State, Stop, [C |Acc]);
	false ->
	    ?fatal_error(State, "Character not allowed in pubid litteral: " ++ [C])
    end;
parse_pubid_litteral(Bytes, State, Stop, Acc) ->
    unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_pubid_litteral/4], 
			     undefined).

%%======================================================================
%% DTD Parsing
%%======================================================================

%%----------------------------------------------------------------------
%% Function  : parse_doctype(Rest, State, Level, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Level = integer()
%%             Acc = string()
%% Result    : {string(), Rest, State}
%% Description: This function is just searching the end of the doctype 
%%              declaration and doesn't parse it. It's used when the 
%%              parse_dtd option is set to skip.
%%----------------------------------------------------------------------
%% Just returns doctype as string
%% parse_doctype(?STRING_EMPTY, State, Level, Acc) ->
%%     cf(?STRING_EMPTY, State, Level, Acc, fun parse_doctype/4);
%% parse_doctype(?STRING("\r"), State, Level, Acc) ->
%%     cf(?STRING("\r"), State, Level, Acc, fun parse_doctype/4);
%% parse_doctype(?STRING_REST(">", Rest), State, 0, Acc) ->
%%     {Acc, Rest, State};
%% parse_doctype(?STRING_REST(">", Rest), State, Level, Acc) ->
%%     parse_doctype(Rest, State, Level-1, Acc);
%% parse_doctype(?STRING_REST("<", Rest), State, Level, Acc) ->
%%     parse_doctype(Rest, State, Level+1, [$<|Acc]);
%% parse_doctype(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) ->
%%     parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]);
%% parse_doctype(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) ->
%%     parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]);
%% parse_doctype(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Level, Acc) ->
%%     parse_doctype(Rest, State#xmerl_sax_parser_state{line_no=N+1}, Level, [?lf |Acc]);
%% parse_doctype(?STRING_UNBOUND_REST(C, Rest), State, Level, Acc) ->
%%     parse_doctype(Rest, State, Level, [C|Acc]);
%% parse_doctype(Bytes, State, Level, Acc) ->
%%     unicode_incomplete_check([Bytes, State, Level, Acc, fun parse_doctype/4], 
%% 			     undefined).
    

%%----------------------------------------------------------------------
%% Function  : parse_doctype(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: This function starts an parsing of the DTD
%%              that sends apropriate events. 
%%              [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? 
%%                          ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
%%----------------------------------------------------------------------
parse_doctype(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_doctype/2);
parse_doctype(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []),
    parse_doctype(Rest, State1);
parse_doctype(?STRING_UNBOUND_REST(C, Rest), State) ->
    case is_name_start(C) of
	true ->
	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
	    parse_doctype_1(Rest1, State1, Name, false);
	false ->
	    ?fatal_error(State, "expecting name or whitespace")
    end;
parse_doctype(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_doctype/2], 
			     undefined).


%%----------------------------------------------------------------------
%% Function  : parse_doctype_1(Rest, State, Name, Definition) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Name = string()
%%             Definition = true |false
%% Result    : {Rest, State}
%% Description: Gets the DTD name as a parameter and contine parse the DOCTYPE
%%              directive
%%----------------------------------------------------------------------
parse_doctype_1(?STRING_EMPTY, State, Name, Definition) ->
    cf(?STRING_EMPTY, State, Name, Definition, fun parse_doctype_1/4);
parse_doctype_1(?STRING_REST(">", Rest), State, _, _) ->
    {Rest, State};
parse_doctype_1(?STRING_REST("[", Rest), State, Name, Definition) ->
    State1 = 
	case Definition of
	    false ->
		event_callback({startDTD, Name, "", ""}, State);
	    true ->
		State
	end,	    
    {Rest1, State2} = parse_doctype_decl(Rest, State1),
    {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
    parse_doctype_2(Rest2, State3);
parse_doctype_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Name, Definition) when ?is_whitespace(C) ->
    {_WS, Rest1, State1} = whitespace(Rest, State, []),
    parse_doctype_1(Rest1, State1, Name, Definition);
parse_doctype_1(?STRING_UNBOUND_REST(C, _) = Rest, State, Name, _Definition) when C == $S; C == $P ->
    {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false),
    State2 = event_callback({startDTD, Name, PubId, SysId}, State1),
    State3 = 
	case State2#xmerl_sax_parser_state.skip_external_dtd of 
	    false -> 
		parse_external_entity(State2#xmerl_sax_parser_state{file_type=dtd}, PubId, SysId);
	    true ->
		State2
	end,
    parse_doctype_1(Rest1, State3, Name, true);
parse_doctype_1(Bytes, State, Name, Definition) ->
    unicode_incomplete_check([Bytes, State, Name, Definition, fun parse_doctype_1/4], 
			     "expecting >, external id or declaration part").


parse_doctype_2(?STRING_REST(">", Rest), State) -> 
    {Rest, State};
parse_doctype_2(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_doctype_2/2], 
			     "expecting >").


%%----------------------------------------------------------------------
%% Function  : parse_external_entity(State, PubId, SysId) -> Result
%% Parameters: State = #xmerl_sax_parser_state{}
%%             PubId = string()
%%             SysId = string()
%% Result    : {Rest, State}
%% Description: Starts the parsing of an external entity by calling the resolver and 
%%              then sends the input to the parsing function. 
%%----------------------------------------------------------------------
%% The public id is not handled
parse_external_entity(State, _PubId, SysId) -> 
    
    ExtRef = check_uri(SysId, State#xmerl_sax_parser_state.current_location),
    
    SaveState =  event_callback({startEntity, SysId}, State),

    State1 = State#xmerl_sax_parser_state{line_no=1, 
					  continuation_state=undefined, 
					  continuation_fun=fun xmerl_sax_parser:default_continuation_cb/1,
					  end_tags = []},
    
    
    EventState = handle_external_entity(ExtRef, State1),

    NewState =  event_callback({endEntity, SysId}, SaveState#xmerl_sax_parser_state{event_state=EventState}),
    NewState#xmerl_sax_parser_state{file_type=normal}.



%%----------------------------------------------------------------------
%% Function  : handle_external_entity(ExtRef, State) -> Result
%% Parameters: ExtRef = {file, string()} | {http, string()}
%%             State = #xmerl_sax_parser_state{}
%% Result    : string() | binary()
%% Description: Returns working directory, entity and the opened
%%              filedescriptor.
%%----------------------------------------------------------------------
handle_external_entity({file, FileToOpen}, State) ->

    case file:open(FileToOpen, [raw, read, binary])  of
        {error, Reason} ->
	    ?fatal_error(State, "Couldn't open external entity "++ FileToOpen ++ " : " 
			 ++ file:format_error(Reason));
        {ok, FD} ->
	    {?STRING_EMPTY, EntityState} = 
		parse_external_entity_1(<<>>, 
					State#xmerl_sax_parser_state{continuation_state=FD,
							      current_location=filename:dirname(FileToOpen),
							      entity=filename:basename(FileToOpen)}),
	    file:close(FD),
	    EntityState#xmerl_sax_parser_state.event_state
    end;
handle_external_entity({http, Url}, State) ->

    try
	{Host, Port, Key} = http(Url),
	TmpFile = http_get_file(Host, Port, Key),
	case file:open(TmpFile, [raw, read, binary])  of
	    {error, Reason} ->
		?fatal_error(State, "Couldn't open temporary file " ++ TmpFile ++ " : " 
		       ++ file:format_error(Reason));
	    {ok, FD} ->
		{?STRING_EMPTY, EntityState} = 
		    parse_external_entity_1(<<>>, 
					    State#xmerl_sax_parser_state{continuation_state=FD,
								  current_location=filename:dirname(Url),
								  entity=filename:basename(Url)}),
		file:close(FD),
		file:delete(TmpFile),
		EntityState#xmerl_sax_parser_state.event_state
	end
    catch
	throw:{error, Error} -> 	    
	    ?fatal_error(State, Error)
    end;
handle_external_entity({Tag, _Url}, State) ->
    ?fatal_error(State, "Unsupported URI type: " ++ atom_to_list(Tag)).

%%----------------------------------------------------------------------
%% Function  : parse_external_entity_1(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Parse the external entity.
%%----------------------------------------------------------------------
parse_external_entity_1(?STRING_EMPTY, #xmerl_sax_parser_state{file_type=Type} = State) ->
    case catch cf(?STRING_EMPTY, State, fun parse_external_entity_1/2) of
	{Rest, State1} when is_record(State1, xmerl_sax_parser_state) ->
	    {Rest, State};
	{fatal_error, {State1, "No more bytes"}} when Type == dtd; Type == entity ->
	    {?STRING_EMPTY, State1};
	Other ->
	    throw(Other)
    end;	
parse_external_entity_1(?BYTE_ORDER_MARK_1, State) ->
    cf(?BYTE_ORDER_MARK_1, State, fun parse_external_entity_1/2);
parse_external_entity_1(?BYTE_ORDER_MARK_2, State) ->
    cf(?BYTE_ORDER_MARK_2, State, fun parse_external_entity_1/2);
parse_external_entity_1(?BYTE_ORDER_MARK_REST(Rest), State) ->
    parse_external_entity_1(Rest, State);
parse_external_entity_1(?STRING("<") = Bytes, State) ->
    cf(Bytes, State, fun parse_external_entity_1/2);
parse_external_entity_1(?STRING("<?") = Bytes, State) ->
    cf(Bytes, State, fun parse_external_entity_1/2);
parse_external_entity_1(?STRING("<?x") = Bytes, State) ->
    cf(Bytes, State, fun parse_external_entity_1/2);
parse_external_entity_1(?STRING("<?xm") = Bytes, State) ->
    cf(Bytes, State, fun parse_external_entity_1/2);
parse_external_entity_1(?STRING("<?xml") = Bytes, State) ->
    cf(Bytes, State, fun parse_external_entity_1/2);
parse_external_entity_1(?STRING_REST("<?xml", Rest) = Bytes, 
			#xmerl_sax_parser_state{file_type=Type} = State) ->   
    {Rest1, State1} = 
	case is_next_char_whitespace(Rest, State) of
	    false ->
		{Bytes, State};
	    true ->
		{_XmlAttributes, R, S} = parse_version_info(Rest, State, []),
		%S1 =  event_callback({processingInstruction, "xml", XmlAttributes}, S),% The XML decl. should not be reported as a PI
		{R, S}
	end,
    case Type of
	dtd -> 
	    case catch parse_doctype_decl(Rest1, State1)  of
		{Rest2, State2} when is_record(State2, xmerl_sax_parser_state) ->
		    {Rest2, State2};
		{fatal_error, {State2, "No more bytes"}} ->
		    {?STRING_EMPTY, State2};
		Other ->
		    throw(Other)
	    end;

	_ -> % Type is normal or entity
	    parse_content(Rest1, State1, [], true)
    end;
parse_external_entity_1(?STRING_UNBOUND_REST(_C, _) = Bytes, 
			#xmerl_sax_parser_state{file_type=Type} = State) ->  
    case Type of
	normal ->
	    parse_content(Bytes, State, [], true);
	dtd ->
	    parse_doctype_decl(Bytes, State);
	entity ->
	    parse_doctype_decl(Bytes, State)    end;
parse_external_entity_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_external_entity_1/2], 
			     undefined).

%%----------------------------------------------------------------------
%% Function  : is_next_char_whitespace(Bytes, State) -> Result
%% Parameters: Bytes = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : true | false
%% Description: Checks if first character is whitespace.
%%----------------------------------------------------------------------
is_next_char_whitespace(?STRING_UNBOUND_REST(C, _), _) when ?is_whitespace(C) -> 
    true;
is_next_char_whitespace(?STRING_UNBOUND_REST(_C, _), _) -> 
    false;
is_next_char_whitespace(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun is_next_char_whitespace/2], 
			     undefined).

%%----------------------------------------------------------------------
%% Function  : parse_external_id(Rest, State, OptionalSystemId) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             OptionalSystemId = true | false
%% Result    : {PubId, SysId, Rest, State}
%%             PubId = string()
%%             SysId = string()
%% Description: Parse an external id. The function is used in two cases one
%%              where the system is optional and one where it's required
%%              after a public id.
%%              [75] ExternalID ::= 'SYSTEM' S SystemLiteral
%%             		          | 'PUBLIC' S PubidLiteral S SystemLiteral 
%%----------------------------------------------------------------------
parse_external_id(?STRING_EMPTY, State, OptionalSystemId) ->
    cf(?STRING_EMPTY, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING("S") = Bytes, State,OptionalSystemId) ->
    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING("SY") = Bytes, State, OptionalSystemId) ->
    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING("SYS") = Bytes, State, OptionalSystemId) ->
    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING("SYST") = Bytes, State, OptionalSystemId) ->
    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING("SYSTE") = Bytes, State, OptionalSystemId) ->
    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING_REST("SYSTEM", Rest), State, _) ->
    {SysId, Rest1, State1} = parse_system_id(Rest, State, false),
    {"", SysId, Rest1, State1};
parse_external_id(?STRING("P") = Bytes, State, OptionalSystemId) ->
    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING("PU") = Bytes, State, OptionalSystemId) ->
    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING("PUB") = Bytes, State, OptionalSystemId) ->
    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING("PUBL") = Bytes, State, OptionalSystemId) ->
    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING("PUBLI") = Bytes, State, OptionalSystemId) ->
    cf(Bytes, State, OptionalSystemId, fun parse_external_id/3);
parse_external_id(?STRING_REST("PUBLIC", Rest), State, OptionalSystemId) ->
    parse_public_id(Rest, State, OptionalSystemId);
parse_external_id(Bytes, State, OptionalSystemId) ->
    unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_external_id/3], 
			     "expecting SYSTEM or PUBLIC").


%%----------------------------------------------------------------------
%% Function  : parse_system_id(Rest, State, OptionalSystemId) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             OptionalSystemId = true | false
%% Result    : {SysId, Rest, State}
%%             SysId = string()
%% Description: Parse a system id. The function is used in two cases one
%%              where the system is optional and one where it's required.
%%----------------------------------------------------------------------
parse_system_id(?STRING_UNBOUND_REST(C, _) = Bytes, State, OptionalSystemId) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []),
    check_system_litteral(Rest, State1, OptionalSystemId);
parse_system_id(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) ->
    {"", Bytes, State};
parse_system_id(Bytes, State, OptionalSystemId) ->
    unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_system_id/3], 
			     "whitespace expected").

check_system_litteral(?STRING_UNBOUND_REST(C, Rest), State, _OptionalSystemId) when C == $'; C == $" ->
    parse_system_litteral(Rest, State, C, []);
check_system_litteral(?STRING_UNBOUND_REST(_C, _) = Bytes, State, true) ->
    {"", Bytes, State};
check_system_litteral(Bytes, State, OptionalSystemId) ->
    unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_system_litteral/3], 
			     "\" or \' expected").


%%----------------------------------------------------------------------
%% Function  : parse_public_id(Rest, State, OptionalSystemId) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             OptionalSystemId = true | false
%% Result    : {PubId, SysId, Rest, State}
%%             PubId = string()
%%             SysId = string()
%% Description: Parse a public id. The function is used in two cases one
%%              where the following system is optional and one where it's required.
%%----------------------------------------------------------------------
parse_public_id(?STRING_UNBOUND_REST(C, _) = Bytes, State, OptionalSystemId) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []),
    check_public_litteral(Rest, State1, OptionalSystemId);
parse_public_id(Bytes, State,OptionalSystemId) ->
    unicode_incomplete_check([Bytes, State, OptionalSystemId, fun parse_public_id/3], 
			     "whitespace expected").


check_public_litteral(?STRING_UNBOUND_REST(C, Rest), State, OptionalSystemId) when C == $'; C == $" ->
    {PubId, Rest1, State1} = parse_pubid_litteral(Rest, State, C, []),
    {SysId, Rest2, State2} = parse_system_id(Rest1, State1, OptionalSystemId),
    {PubId, SysId, Rest2, State2};
check_public_litteral(Bytes, State, OptionalSystemId) ->
    unicode_incomplete_check([Bytes, State, OptionalSystemId, fun check_public_litteral/3], 
			     "\" or \' expected").


%%----------------------------------------------------------------------
%% Function  : parse_doctype_decl(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Parse the DOCTYPE declaration part
%%              [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl 
%%                                | NotationDecl | PI | Comment 	
%%----------------------------------------------------------------------
parse_doctype_decl(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_doctype_decl/2);
parse_doctype_decl(?STRING("<"), State) ->
    cf(?STRING("<"), State, fun parse_doctype_decl/2);
parse_doctype_decl(?STRING_REST("<?", Rest), State) ->
    {Rest1, State1} = parse_pi(Rest, State),
    parse_doctype_decl(Rest1, State1);
parse_doctype_decl(?STRING_REST("%", Rest), State) ->
    {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
    case Ref of
	{internal_parameter, _, RefValue} ->
	    IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
	    parse_doctype_decl(?APPEND_STRING(IValue, Rest1), State1);
	{external_parameter, _, {PubId, SysId}} ->
	    State2 = parse_external_entity(State1#xmerl_sax_parser_state{file_type = entity}, PubId, SysId),
	    parse_doctype_decl(Rest1, State2);
	 {not_found, Name} ->
	    case State#xmerl_sax_parser_state.skip_external_dtd of
		false ->
		    ?fatal_error(State1, "Entity not declared: " ++ Name); %%WFC: Entity Declared 
		true ->
		    parse_doctype_decl(Rest1, State1)
	    end
    end;
parse_doctype_decl(?STRING_REST("<!", Rest1), State) ->
    parse_doctype_decl_1(Rest1, State);
parse_doctype_decl(?STRING_REST("]", Rest), State) ->
    {Rest, State};
parse_doctype_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []),
    parse_doctype_decl(Rest, State1);
parse_doctype_decl(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_doctype_decl/2], 
			     "expecting ELEMENT, ATTLIST, ENTITY, NOTATION or comment").


%%----------------------------------------------------------------------
%% Function  : parse_doctype_decl_1(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Main switching function for the different markup declarations
%%              of the DOCTYPE.
%%----------------------------------------------------------------------
parse_doctype_decl_1(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_doctype_decl_1/2);

parse_doctype_decl_1(?STRING("E") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("EL") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ELE") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ELEM") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ELEME") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ELEMEN") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING_REST("ELEMENT", Rest), State) ->
    {Rest1, State1} = parse_element_decl(Rest, State),
    parse_doctype_decl(Rest1, State1);

parse_doctype_decl_1(?STRING("A") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("AT") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ATT") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ATTL") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ATTLI") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ATTLIS") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING_REST("ATTLIST", Rest), State) ->
    {Rest1, State1} = parse_att_list_decl(Rest, State),
    parse_doctype_decl(Rest1, State1);

%% E clause not needed here because already taken care of above.
parse_doctype_decl_1(?STRING("EN") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ENT") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ENTI") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("ENTIT") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING_REST("ENTITY", Rest), State) ->
    {Rest1, State1} = parse_entity_decl(Rest, State),
    parse_doctype_decl(Rest1, State1);

parse_doctype_decl_1(?STRING("N") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("NO") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("NOT") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("NOTA") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("NOTAT") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("NOTATI") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING("NOTATIO") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING_REST("NOTATION", Rest), State) ->
    {Rest1, State1} = parse_notation_decl(Rest, State),
    parse_doctype_decl(Rest1, State1);
parse_doctype_decl_1(?STRING("-") = Bytes, State) ->
    cf(Bytes, State, fun parse_doctype_decl_1/2);
parse_doctype_decl_1(?STRING_REST("--", Rest), State) ->
    {Rest1, State1} = parse_comment(Rest, State, []),
    parse_doctype_decl(Rest1, State1);
parse_doctype_decl_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_doctype_decl_1/2], 
			     "expecting ELEMENT, ATTLIST, ENTITY, NOTATION or comment").


%%----------------------------------------------------------------------
%% Function  : parse_element_decl(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Parse element declarations.
%%              [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
%%----------------------------------------------------------------------
parse_element_decl(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_element_decl/2);
parse_element_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []), 
    parse_element_decl_1(Rest, State1);
parse_element_decl(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_element_decl/2], 
			     "whitespace expected").

parse_element_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
    case is_name_start(C) of
	true ->
	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
	    {Model, Rest2, State2} = parse_element_content(Rest1, State1),
	    State3 =  event_callback({elementDecl, Name, Model}, State2),
	    {Rest2, State3};
	false ->
	    ?fatal_error(State, "name expected")
    end;
parse_element_decl_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_element_decl_1/2], 
			     undefined).


%%----------------------------------------------------------------------
%% Function  : parse_element_content(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Parse contents of an element declaration.
%%              [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
%%----------------------------------------------------------------------
parse_element_content(?STRING_EMPTY, State) ->
        cf(?STRING_EMPTY, State, fun parse_element_content/2);
parse_element_content(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []),
    parse_element_content_1(Rest, State1, []);
parse_element_content(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_element_content/2], 
			     "whitespace expected").


%%----------------------------------------------------------------------
%% Function  : parse_element_content_1(Rest, State, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Acc = string()
%% Result    : {Content, Rest, State}
%%             Content = string()
%% Description: Parse contents of an element declaration.
%%----------------------------------------------------------------------
parse_element_content_1(?STRING_EMPTY, State, Acc) ->
        cf(?STRING_EMPTY, State, Acc, fun parse_element_content_1/3);
parse_element_content_1(?STRING_REST(">", Rest), State, Acc) ->
    {lists:reverse(delete_leading_whitespace(Acc)), Rest, State};
parse_element_content_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
    parse_element_content_1(Rest, State, [C|Acc]);
parse_element_content_1(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_element_content_1/3], 
			     undefined).

delete_leading_whitespace([C |Acc]) when ?is_whitespace(C)->
    delete_leading_whitespace(Acc);
delete_leading_whitespace(Acc) ->
    Acc.
								   
%%----------------------------------------------------------------------
%% Function  : parse_att_list_decl(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Parse an attribute list declaration.
%%              [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
%%----------------------------------------------------------------------
parse_att_list_decl(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_att_list_decl/2);
parse_att_list_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []), 
    parse_att_list_decl_1(Rest, State1);
parse_att_list_decl(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_att_list_decl/2], 
			     "whitespace expected").


parse_att_list_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
    case is_name_start(C) of
	true ->
	    {ElementName, Rest1, State1} = parse_name(Rest, State, [C]),
	    parse_att_defs(Rest1, State1, ElementName);
	false ->
	    ?fatal_error(State, "name expected")
    end;
parse_att_list_decl_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_att_list_decl_1/2], 
			     undefined).


%%----------------------------------------------------------------------
%% Function  : parse_att_defs(Rest, State, ElementName) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             ElementName = string()
%% Result    : {Rest, State}
%% Description: Parse an attribute definition.
%%              [53] AttDef ::= S Name S AttType S DefaultDecl
%%----------------------------------------------------------------------
parse_att_defs(?STRING_EMPTY, State, ElementName) ->
    cf(?STRING_EMPTY, State, ElementName, fun parse_att_defs/3);
parse_att_defs(?STRING_REST(">", Rest), State, _ElementName) ->
    {Rest, State};
parse_att_defs(?STRING_UNBOUND_REST(C, _) = Rest, State, ElementName) when ?is_whitespace(C) ->
    {_WS, Rest1, State1} = whitespace(Rest, State, []),
    parse_att_defs(Rest1, State1, ElementName);
parse_att_defs(?STRING_UNBOUND_REST(C, Rest), State, ElementName) ->
    case is_name_start(C) of 
	true ->
	    {AttrName, Rest1, State1} = parse_name(Rest, State, [C]),
	    {Type, Rest2, State2} = parse_att_type(Rest1, State1),
	    {Mode, Value, Rest3, State3} = parse_default_decl(Rest2, State2),
	    State4 = event_callback({attributeDecl, ElementName, AttrName, Type, Mode, Value}, State3),
	    parse_att_defs(Rest3, State4, ElementName);
	false ->
	    ?fatal_error(State, "whitespace or name expected")
    end;
parse_att_defs(Bytes, State, ElementName) ->
    unicode_incomplete_check([Bytes, State, ElementName, fun parse_att_defs/3], 
			     undefined).


%%----------------------------------------------------------------------
%% Function  : parse_att_type(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Type, Rest, State}
%%             Type = string()
%% Description: Parse an attribute type.
%%              [54] AttType ::= StringType | TokenizedType | EnumeratedType 
%%              [55] StringType  ::= 'CDATA'
%%              [56] TokenizedType  ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY'
%%                                    | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
%%              [57] EnumeratedType ::= NotationType | Enumeration
%%              [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' 
%%              [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' 
%%----------------------------------------------------------------------
parse_att_type(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_att_type/2);
parse_att_type(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []),
    case parse_att_type_1(Rest, State1, []) of 
	{Type, Rest1, State2} when Type == "("; Type == "NOTATION" -> 
	    {T, Rest2, State3} = parse_until_right_paren(Rest1, State2, []),
	    {Type ++ T, Rest2, State3};
	{Type, Rest1, State2} ->
	    case check_att_type(Type) of
		true ->
		    {Type, Rest1, State2};
		false ->
		    ?fatal_error(State2, "wrong attribute type")
	    end
    end;
parse_att_type(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_att_type/2], 
			     "whitespace expected").


%%----------------------------------------------------------------------
%% Function  : parse_att_type_1(Rest, State, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Acc = string()
%% Result    : {Type, Rest, State}
%%             Type = string()
%% Description: Parse an attribute type.
%%----------------------------------------------------------------------
parse_att_type_1(?STRING_EMPTY, State, Acc) ->
    cf(?STRING_EMPTY, State, Acc, fun parse_att_type_1/3);
parse_att_type_1(?STRING_UNBOUND_REST(C, _) = Bytes, State, Acc)  when ?is_whitespace(C) ->
    {lists:reverse(Acc), Bytes, State};
parse_att_type_1(?STRING_REST("(", Rest), State, []) ->
    {"(", Rest, State};
parse_att_type_1(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
    parse_att_type_1(Rest, State, [C|Acc]);
parse_att_type_1(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_att_type_1/3], 
			     undefined).

%%----------------------------------------------------------------------
%% Function  : check_att_type(Type) -> Result
%% Parameters: Type = string()
%% Result    : true | false
%% Description:Check if an attribute type is valid.
%%----------------------------------------------------------------------
check_att_type("CDATA") ->
    true;
check_att_type("ID") ->
    true;
check_att_type("IDREF") ->
    true;
check_att_type("IDREFS") ->
    true;
check_att_type("ENTITY") ->
    true;
check_att_type("ENTITIES") ->
    true;
check_att_type("NMTOKEN") ->
    true;
check_att_type("NMTOKENS") ->
    true;
check_att_type(_) ->
    false.


%%----------------------------------------------------------------------
%% Function  : parse_until_right_paren(Rest, State, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Acc = string()
%% Result    : {Type, Rest, State}
%%             Type = string()
%% Description: Parse an enumurated type until ')'.
%%----------------------------------------------------------------------
parse_until_right_paren(?STRING_EMPTY, State, Acc) ->
    cf(?STRING_EMPTY, State, Acc, fun parse_until_right_paren/3);
parse_until_right_paren(?STRING_REST(")", Rest), State, Acc) ->
    {lists:reverse(")" ++ Acc), Rest, State};
parse_until_right_paren(?STRING_UNBOUND_REST(C, Rest), State, Acc) ->
    parse_until_right_paren(Rest, State, [C|Acc]);
parse_until_right_paren(Bytes, State, Acc) ->
    unicode_incomplete_check([Bytes, State, Acc, fun parse_until_right_paren/3], 
			     undefined).


%%----------------------------------------------------------------------
%% Function  : parse_default_decl(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Default, Rest, State}
%%             Default = string()
%% Description: Parse a default declaration.
%%              [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
%%----------------------------------------------------------------------
parse_default_decl(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_default_decl/2);
parse_default_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []),
    parse_default_decl_1(Rest, State1);
parse_default_decl(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_default_decl/2], 
			     "whitespace expected").


%%----------------------------------------------------------------------
%% Function  : parse_default_decl_1(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Default, Rest, State}
%%             Default = string()
%% Description: Parse a default declaration.
%%----------------------------------------------------------------------
parse_default_decl_1(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_default_decl_1/2);
parse_default_decl_1(?STRING_REST("#", _Rest) = Bytes, State) ->
    case Bytes of
	?STRING("#R") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#RE") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#REQ") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#REQU") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#REQUI") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#REQUIR") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#REQUIRE") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING_REST("#REQUIRED", Rest1) ->
	    {"#REQUIRED", undefined, Rest1, State};

	?STRING("#I") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#IM") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#IMP") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#IMPL") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#IMPLI") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#IMPLIE") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING_REST("#IMPLIED", Rest1)  ->
	    {"#IMPLIED", undefined, Rest1, State};

	?STRING("#F") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#FI") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#FIX") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING("#FIXE") ->
	    cf(Bytes, State, fun parse_default_decl_1/2);
	?STRING_REST("#FIXED", Rest1)  ->
	    parse_fixed(Rest1, State);
	_  ->
	    ?fatal_error(State, "REQUIRED, IMPLIED or FIXED expected after #")
    end;
parse_default_decl_1(?STRING_UNBOUND_REST(C, Rest), State) when C == $'; C == $" ->
    {DefaultValue, Rest1, State1} = parse_att_value(Rest, State, C, []),
    {"", DefaultValue, Rest1, State1};
parse_default_decl_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_default_decl_1/2], 
			     "bad default declaration").


parse_fixed(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {DefaultValue, Rest, State1} = parse_att_value(Bytes, State), % parse_att_value removes leading WS
    {"#FIXED", DefaultValue, Rest, State1};
parse_fixed(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_fixed/2], 
			     "whitespace expected").

%%----------------------------------------------------------------------
%% Function  : parse_entity_decl(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Parse an entity declaration.
%%              [70] EntityDecl ::= GEDecl | PEDecl
%%              [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
%%              [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
%%----------------------------------------------------------------------
parse_entity_decl(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_entity_decl/2);
parse_entity_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []),
    parse_entity_decl_1(Rest, State1);
parse_entity_decl(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_entity_decl/2], 
			     "whitespace expected").


%%----------------------------------------------------------------------
%% Function  : parse_entity_decl_1(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Parse an entity declaration.
%%----------------------------------------------------------------------
parse_entity_decl_1(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_entity_decl_1/2);
parse_entity_decl_1(?STRING_REST("%", Rest), State) ->
    case is_next_char_whitespace(Rest, State) of
	true ->
	    {_WS, Rest1, State1} = whitespace(Rest, State, []),
	    parse_pe_name(Rest1, State1);
	false ->
	    ?fatal_error(State, "whitespace expected")
    end;
parse_entity_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
    case is_name_start(C) of
	true ->
	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
	    case is_next_char_whitespace(Rest1, State1) of
		true ->
		    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
		    parse_entity_def(Rest2, State2, Name);
		false ->
		    ?fatal_error(State1, "whitespace expected")
	    end;
	false ->
	    ?fatal_error(State, "name or % expected")
    end;
parse_entity_decl_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_entity_decl_1/2], 
			     undefined).




parse_pe_name(?STRING_UNBOUND_REST(C, Rest), State) ->
    case is_name_start(C) of
	true ->
	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
	    case is_next_char_whitespace(Rest1, State1) of
		true ->
		    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
		    parse_pe_def(Rest2, State2, Name);
		false ->
		    ?fatal_error(State1, "whitespace expected")
	    end;
	false ->
	    ?fatal_error(State, "name expected")
    end;
parse_pe_name(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_pe_name/2], 
			     undefined).



%%----------------------------------------------------------------------
%% Function  : parse_entity_def(Rest, State, Name) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Name = string()
%% Result    : {Rest, State}
%% Description: Parse an entity definition.
%%              [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
%%----------------------------------------------------------------------
parse_entity_def(?STRING_EMPTY, State, Name) ->
    cf(?STRING_EMPTY, State, Name, fun parse_entity_def/3);
parse_entity_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" ->
    {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []),
    insert_reference({Name, internal_general, Value}, State1#xmerl_sax_parser_state.ref_table),
    State2 =  event_callback({internalEntityDecl, Name, Value}, State1),
    {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
    parse_def_end(Rest2, State3);
parse_entity_def(?STRING_UNBOUND_REST(C, _) = Rest, State, Name) when C == $S; C == $P  ->
    {PubId, SysId, Rest1, State1} = parse_external_id(Rest, State, false),
    {Ndata, Rest2, State2} = parse_ndata(Rest1, State1),
    case Ndata of
	undefined ->
	    insert_reference({Name, external_general, {PubId, SysId}},
			     State2#xmerl_sax_parser_state.ref_table),
	    State3 =  event_callback({externalEntityDecl, Name, PubId, SysId}, State2),
	    {Rest2, State3};
	_ ->
	    insert_reference({Name, unparsed, {PubId, SysId, Ndata}}, 
			     State2#xmerl_sax_parser_state.ref_table),
	    State3 =  event_callback({unparsedEntityDecl, Name, PubId, SysId, Ndata}, State2),
	    {Rest2, State3}
    end;    
parse_entity_def(Bytes, State, Name) ->
    unicode_incomplete_check([Bytes, State, Name, fun parse_entity_def/3], 
			     "\", \', SYSTEM or PUBLIC expected").


parse_def_end(?STRING_REST(">", Rest), State) ->
    {Rest, State};
parse_def_end(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_def_end/2], 
			     "> expected").



%%----------------------------------------------------------------------
%% Function  : parse_ndata(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Parse an NDATA declaration.
%%              [76] NDataDecl ::= S 'NDATA' S Name
%%----------------------------------------------------------------------
parse_ndata(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_ndata/2);
parse_ndata(?STRING_REST(">", Rest), State) ->
    {undefined, Rest, State};
parse_ndata(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest1, State1} = whitespace(Bytes, State, []),
    parse_ndata_decl(Rest1, State1);
parse_ndata(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_ndata/2], 
			     "Space before NDATA or > expected").

%%----------------------------------------------------------------------
%% Function  : parse_entity_value(Rest, State, Stop, Acc) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Stop = $' | $"
%%             Acc = string()
%% Result    : {Value, Rest, State}
%%             Value = string()
%% Description: Parse an entity value
%%----------------------------------------------------------------------
parse_entity_value(?STRING_EMPTY, State, undefined, Acc) ->
    {Acc, [], State}; %% stop clause when parsing references
parse_entity_value(?STRING_EMPTY, State, Stop, Acc) ->
    cf(?STRING_EMPTY, State, Stop, Acc, fun parse_entity_value/4);
parse_entity_value(?STRING("\r"), State, Stop, Acc) ->
    cf(?STRING("\r"), State, Stop, Acc, fun parse_entity_value/4);
parse_entity_value(?STRING_REST("\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 
    parse_entity_value(Rest, 
		   State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_entity_value(?STRING_REST("\r\n", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc) -> 
    parse_entity_value(Rest, 
		   State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_entity_value(?STRING_REST("\r", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc)  -> 
    parse_entity_value(Rest, 
		   State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_entity_value(?STRING_REST("\t", Rest), #xmerl_sax_parser_state{line_no=N} = State, Stop, Acc)  -> 
    parse_entity_value(Rest, 
		   State#xmerl_sax_parser_state{line_no=N+1}, Stop, [?space |Acc]);
parse_entity_value(?STRING_REST("&", Rest), State, Stop, Acc)  -> 
    {Ref, Rest1, State1} = parse_reference(Rest, State, false),
    case Ref of 
	{character, _, CharValue}  ->
	    parse_entity_value(Rest1, State1, Stop, [CharValue | Acc]);
	{internal_general, _, Name, _} ->
	    parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc);
	{external_general, Name, _} ->
	    parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc);
	{not_found, Name} ->
	    parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc); 
	{unparsed, Name, _} ->
	    ?fatal_error(State1, "Unparsed entity reference in entity value: " ++ Name)
    end;
parse_entity_value(?STRING_REST("%", Rest), #xmerl_sax_parser_state{file_type=Type} = State, Stop, Acc) ->
    {Ref, Rest1, State1} = parse_pe_reference(Rest, State),
    case Type of
	normal -> %WFC: PEs in Internal Subset
	    {_, Name, _} = Ref,
	    ?fatal_error(State1, "A parameter reference may not occur not within "
			 "markup declarations in the internal DTD subset: " ++ Name);
	_ ->
	    case Ref of 
		{internal_parameter, _, RefValue} ->
		    IValue = ?TO_INPUT_FORMAT(" " ++ RefValue ++ " "),
		    parse_entity_value(?APPEND_STRING(IValue, Rest1), State1, Stop, Acc);
		{external_parameter, _, {_PubId, _SysId}} ->
		    ?fatal_error(State1, "Parameter references in entity value not supported yet.");
		{not_found, Name} ->
		    case State#xmerl_sax_parser_state.skip_external_dtd of
			false ->
			    ?fatal_error(State1, "Entity not declared: " ++ Name); %%VC: Entity Declared
			true ->
			    parse_entity_value(Rest1, State1, Stop, ";" ++ lists:reverse(Name) ++ "&" ++ Acc)
		    end
			
	    end
    end;
parse_entity_value(?STRING_UNBOUND_REST(Stop, Rest), State, Stop, Acc) ->
    {lists:reverse(Acc), Rest, State};
parse_entity_value(?STRING_UNBOUND_REST(C, Rest), State, Stop, Acc)   ->
    if
	?is_char(C) ->
	    parse_entity_value(Rest, State, Stop, [C|Acc]);
	true ->
	     ?fatal_error(State, lists:flatten(io_lib:format("Bad character in entity value: ~p", [C])))
    end;
parse_entity_value(Bytes, State, Stop, Acc)   ->
    unicode_incomplete_check([Bytes, State, Stop, Acc, fun parse_entity_value/4],
			     undefined).

%%----------------------------------------------------------------------
%% Function  : parse_ndata_decl(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Name, Rest, State}
%%             Name = string()
%% Description: Parse an NDATA declaration.
%%              [76] NDataDecl ::= S 'NDATA' S Name
%%----------------------------------------------------------------------
parse_ndata_decl(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_ndata_decl/2);
parse_ndata_decl(?STRING_REST(">", Rest), State) ->
    {undefined, Rest, State};
parse_ndata_decl(?STRING("N") = Bytes, State) ->
    cf(Bytes, State, fun parse_ndata_decl/2);
parse_ndata_decl(?STRING("ND") = Bytes, State) ->
    cf(Bytes, State, fun parse_ndata_decl/2);
parse_ndata_decl(?STRING("NDA") = Bytes, State) ->
    cf(Bytes, State, fun parse_ndata_decl/2);
parse_ndata_decl(?STRING("NDAT") = Bytes, State) ->
    cf(Bytes, State, fun parse_ndata_decl/2);
parse_ndata_decl(?STRING_REST("NDATA", Rest), State) ->
    parse_ndata_decl_1(Rest, State);
parse_ndata_decl(Bytes, State) -> 
    unicode_incomplete_check([Bytes, State, fun parse_ndata_decl/2], 
			     "NDATA or > expected").


parse_ndata_decl_1(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []),
    parse_ndecl_name(Rest, State1);
parse_ndata_decl_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_ndata_decl/2], 
				     "whitespace expected").


parse_ndecl_name(?STRING_UNBOUND_REST(C, Rest), State) ->
    case is_name_start(C) of
	true ->
	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
	    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
	    {Rest3, State3} = parse_def_end(Rest2, State2),
	    {Name, Rest3, State3};
	false ->	
	    ?fatal_error(State, "name expected")
    end;
parse_ndecl_name(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_ndecl_name/2], 
			     undefined).

%%----------------------------------------------------------------------
%% Function  : parse_pe_def(Rest, State, Name) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             Name = string()
%% Result    : {Rest, State}
%% Description: Parse an parameter entity definition.
%%              [74] PEDef ::= EntityValue | ExternalID
%%----------------------------------------------------------------------
parse_pe_def(?STRING_EMPTY, State, Name) ->
    cf(?STRING_EMPTY, State, Name, fun parse_pe_def/3);
parse_pe_def(?STRING_UNBOUND_REST(C, Rest), State, Name) when C == $'; C == $" ->
    {Value, Rest1, State1} = parse_entity_value(Rest, State, C, []), 
    Name1 = "%" ++ Name,
    insert_reference({Name1, internal_parameter, Value},
                     State1#xmerl_sax_parser_state.ref_table),
    State2 =  event_callback({internalEntityDecl, Name1, Value}, State1),
    {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
    parse_def_end(Rest2, State3);
parse_pe_def(?STRING_UNBOUND_REST(C, _) = Bytes, State, Name) when C == $S; C == $P  ->
    {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, false),
    Name1 = "%" ++ Name,
    insert_reference({Name1, external_parameter, {PubId, SysId}}, 
		     State1#xmerl_sax_parser_state.ref_table),
    State2 =  event_callback({externalEntityDecl, Name1, PubId, SysId}, State1),
    {_WS, Rest2, State3} = whitespace(Rest1, State2, []),
    parse_def_end(Rest2, State3);
parse_pe_def(Bytes, State, Name) ->
    unicode_incomplete_check([Bytes, State, Name, fun parse_pe_def/3], 
			     "\", \', SYSTEM or PUBLIC expected").


%%----------------------------------------------------------------------
%% Function  : parse_notation_decl(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {Rest, State}
%% Description: Parse a NOTATION declaration.
%%              [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID |  PublicID) S? '>'
%%----------------------------------------------------------------------
parse_notation_decl(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_notation_decl/2);
parse_notation_decl(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []), 
    parse_notation_decl_1(Rest, State1);
parse_notation_decl(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_notation_decl/2], 
			     "whitespace expected"). 


parse_notation_decl_1(?STRING_UNBOUND_REST(C, Rest), State) ->
    case is_name_start(C) of
	true ->
	    {Name, Rest1, State1} = parse_name(Rest, State, [C]),
	    {PubId, SysId, Rest2, State2} = parse_notation_id(Rest1, State1),
	    State3 =  event_callback({notationDecl, Name, PubId, SysId}, State2),
	    {Rest2, State3};
	false ->
	    ?fatal_error(State, "name expected")
    end;
parse_notation_decl_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_notation_decl_1/2], 
			     undefined). 

%%----------------------------------------------------------------------
%% Function  : parse_notation_id(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {PubId, SysId, Rest, State}
%%             PubId = string()
%%             SysId = string()
%% Description: Parse a NOTATION identity. The public id case is a special 
%%              variant of extenal id where just the public part is allowed.
%%              This is allowed if the third parameter in parse_external_id/3 
%%              is true.
%%              [83] PublicID ::= 'PUBLIC' S PubidLiteral 
%%----------------------------------------------------------------------
parse_notation_id(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_notation_id/2);
%parse_notation_id(?STRING_REST(">", Rest), State)  ->
%    {"", "", Rest, State};
parse_notation_id(?STRING_UNBOUND_REST(C, _) = Bytes, State) when ?is_whitespace(C) ->
    {_WS, Rest, State1} = whitespace(Bytes, State, []),
    parse_notation_id_1(Rest, State1);
parse_notation_id(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_notation_id/2], 
			     "whitespace expected").

%%----------------------------------------------------------------------
%% Function  : parse_notation_id_1(Rest, State) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%% Result    : {PubId, SysId, Rest, State}
%%             PubId = string()
%%             SysId = string()
%% Description: Parse a NOTATION identity.
%%----------------------------------------------------------------------
parse_notation_id_1(?STRING_EMPTY, State) ->
    cf(?STRING_EMPTY, State, fun parse_notation_id_1/2);
parse_notation_id_1(?STRING_UNBOUND_REST(C, _) = Bytes, State) when C == $S; C == $P ->
    {PubId, SysId, Rest1, State1} = parse_external_id(Bytes, State, true), 
    {_WS, Rest2, State2} = whitespace(Rest1, State1, []),
    {Rest3, State3} = parse_def_end(Rest2, State2),
    {PubId, SysId, Rest3, State3};
%parse_notation_id_1(?STRING_REST(">", Rest), State) ->
%    {"", "", Rest, State};
parse_notation_id_1(Bytes, State) ->
    unicode_incomplete_check([Bytes, State, fun parse_notation_id_1/2], 
			     "external id or public id expected").


%%======================================================================
%% Character checks and definitions
%%======================================================================

%%----------------------------------------------------------------------
%% Definitions of the first 256 characters
%% 0 - not classified, 
%% 1 - base_char or ideographic, 
%% 2 - combining_char or digit or extender,
%% 3 - $. or $- or $_ or $:
%%----------------------------------------------------------------------
-define(SMALL, {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,3,0,2,2,2,2,2,2,2,2,2,2,3,0,
                0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
                1,0,0,0,0,3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
                1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                0,0,0,2,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
                1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
                1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1}).


%%----------------------------------------------------------------------
%% Function  : is_name_start(Char) -> Result
%% Parameters: Char = char()
%% Result    : true | false
%% Description: Check if character is a valid start of a name.
%%              [5] Name ::= (Letter | '_' | ':') (NameChar)*
%%----------------------------------------------------------------------
is_name_start($_) ->
    true;
is_name_start($:) ->
    true;
is_name_start(C) ->
    is_letter(C).
	    

%%----------------------------------------------------------------------
%% Function  : is_name_start(Char) -> Result
%% Parameters: Char = char()
%% Result    : true | false
%% Description: Check if character is a valid name character.
%%              [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' 
%%                               | CombiningChar | Extender
%%----------------------------------------------------------------------
is_name_char(C) ->
    try element(C, ?SMALL) > 0 
	catch  _:_ ->
		       case is_letter(C) of
			   true ->
			       true;
			   false ->
			       case is_digit(C) of
				   true -> true;
				   false ->
				       case is_combining_char(C) of
					   true -> true;
					   false ->
					       is_extender(C)
				       end
			       end
		       end
	       end.


%%----------------------------------------------------------------------
%% Function  : is_pubid_char(Char) -> Result
%% Parameters: Char = char()
%% Result    : true | false
%% Description: Check if character is a public identity character.
%%              [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] 
%%                                 | [-'()+,./:=?;!*#@$_%]
%%----------------------------------------------------------------------
is_pubid_char(?space) ->
    true;
is_pubid_char(?cr) ->
    true;
is_pubid_char(?lf) ->
    true;
is_pubid_char($!) ->
    true;
is_pubid_char($:) ->
    true;
is_pubid_char($;) ->
    true;
is_pubid_char($=) ->
    true;
is_pubid_char($@) ->
    true;
is_pubid_char($_) ->
    true;
is_pubid_char(C) when $# =< C, C =< $% ->
    true;
is_pubid_char(C) when $' =< C, C =< $/ ->
    true;
is_pubid_char(C) ->
    case is_letter(C) of
	true ->
	    true;
	false ->
	    is_digit(C)
    end.


%%----------------------------------------------------------------------
%% Function  : is_letter(Char) -> Result
%% Parameters: Char = char()
%% Result    : true | false
%% Description: Check if character is a letter.
%%              [84] Letter ::= BaseChar | Ideographic
%%----------------------------------------------------------------------
is_letter(C) ->
    try element(C, ?SMALL) =:= 1
    catch _:_ -> 
        case is_base_char(C) of
	    false ->
	        is_ideographic(C);
    	    true ->
	        true
        end
    end.


%%----------------------------------------------------------------------
%% Function  : is_letter(Char) -> Result
%% Parameters: Char = char()
%% Result    : true | false
%% Description: Check if character is a basic character.
%%              [85] BaseChar 
%%----------------------------------------------------------------------
is_base_char(C) when C >= 16#0041, C =< 16#005A -> true; %% ASCII Latin
is_base_char(C) when C >= 16#0061, C =< 16#007A -> true;
is_base_char(C) when C >= 16#00C0, C =< 16#00D6 -> true; %% ISO Latin
is_base_char(C) when C >= 16#00D8, C =< 16#00F6 -> true;
is_base_char(C) when C >= 16#00F8, C =< 16#00FF -> true;
is_base_char(C) when C >= 16#0100, C =< 16#0131 -> true; %% Accented Latin
is_base_char(C) when C >= 16#0134, C =< 16#013E -> true;
is_base_char(C) when C >= 16#0141, C =< 16#0148 -> true;
is_base_char(C) when C >= 16#014A, C =< 16#017E -> true;
is_base_char(C) when C >= 16#0180, C =< 16#01C3 -> true;
is_base_char(C) when C >= 16#01CD, C =< 16#01F0 -> true;
is_base_char(C) when C >= 16#01F4, C =< 16#01F5 -> true;
is_base_char(C) when C >= 16#01FA, C =< 16#0217 -> true;
is_base_char(C) when C >= 16#0250, C =< 16#02A8 -> true; %% IPA
is_base_char(C) when C >= 16#02BB, C =< 16#02C1 -> true; %% Spacing Modifiers
is_base_char(16#0386) -> true;                           %% Greek
is_base_char(C) when C >= 16#0388, C =< 16#038A -> true;
is_base_char(16#038C) -> true;
is_base_char(C) when C >= 16#038E, C =< 16#03A1 -> true;
is_base_char(C) when C >= 16#03A3, C =< 16#03CE -> true;
is_base_char(C) when C >= 16#03D0, C =< 16#03D6 -> true;
is_base_char(16#03DA) -> true;
is_base_char(16#03DC) -> true;
is_base_char(16#03DE) -> true;
is_base_char(16#03E0) -> true;
is_base_char(C) when C >= 16#03E2, C =< 16#03F3 -> true;
is_base_char(C) when C >= 16#0401, C =< 16#040C -> true; %% Cyrillic
is_base_char(C) when C >= 16#040E, C =< 16#044F -> true;
is_base_char(C) when C >= 16#0451, C =< 16#045C -> true;
is_base_char(C) when C >= 16#045E, C =< 16#0481 -> true;
is_base_char(C) when C >= 16#0490, C =< 16#04C4 -> true;
is_base_char(C) when C >= 16#04C7, C =< 16#04C8 -> true;
is_base_char(C) when C >= 16#04CB, C =< 16#04CC -> true;
is_base_char(C) when C >= 16#04D0, C =< 16#04EB -> true;
is_base_char(C) when C >= 16#04EE, C =< 16#04F5 -> true;
is_base_char(C) when C >= 16#04F8, C =< 16#04F9 -> true;
is_base_char(C) when C >= 16#0531, C =< 16#0556 -> true; %% Armenian
is_base_char(16#0559) -> true;
is_base_char(C) when C >= 16#0561, C =< 16#0586 -> true;
is_base_char(C) when C >= 16#05D0, C =< 16#05EA -> true; %% Hebrew
is_base_char(C) when C >= 16#05F0, C =< 16#05F2 -> true;
is_base_char(C) when C >= 16#0621, C =< 16#063A -> true; %% Arabic
is_base_char(C) when C >= 16#0641, C =< 16#064A -> true;
is_base_char(C) when C >= 16#0671, C =< 16#06B7 -> true;
is_base_char(C) when C >= 16#06BA, C =< 16#06BE -> true;
is_base_char(C) when C >= 16#06C0, C =< 16#06CE -> true;
is_base_char(C) when C >= 16#06D0, C =< 16#06D3 -> true;
is_base_char(16#06D5) -> true;
is_base_char(C) when C >= 16#06E5, C =< 16#06E6 -> true;
is_base_char(C) when C >= 16#0905, C =< 16#0939 -> true; %% Devanagari
is_base_char(16#093D) -> true;
is_base_char(C) when C >= 16#0958, C =< 16#0961 -> true;
is_base_char(C) when C >= 16#0985, C =< 16#098C -> true; %% Bengali
is_base_char(C) when C >= 16#098F, C =< 16#0990 -> true;
is_base_char(C) when C >= 16#0993, C =< 16#09A8 -> true;
is_base_char(C) when C >= 16#09AA, C =< 16#09B0 -> true;
is_base_char(16#09B2) -> true;
is_base_char(C) when C >= 16#09B6, C =< 16#09B9 -> true;
is_base_char(C) when C >= 16#09DC, C =< 16#09DD -> true;
is_base_char(C) when C >= 16#09DF, C =< 16#09E1 -> true;
is_base_char(C) when C >= 16#09F0, C =< 16#09F1 -> true;
is_base_char(C) when C >= 16#0A05, C =< 16#0A0A -> true; %% Gurmukhi
is_base_char(C) when C >= 16#0A0F, C =< 16#0A10 -> true;
is_base_char(C) when C >= 16#0A13, C =< 16#0A28 -> true;
is_base_char(C) when C >= 16#0A2A, C =< 16#0A30 -> true;
is_base_char(C) when C >= 16#0A32, C =< 16#0A33 -> true;
is_base_char(C) when C >= 16#0A35, C =< 16#0A36 -> true;
is_base_char(C) when C >= 16#0A38, C =< 16#0A39 -> true;
is_base_char(C) when C >= 16#0A59, C =< 16#0A5C -> true;
is_base_char(16#0A5E) -> true;
is_base_char(C) when C >= 16#0A72, C =< 16#0A74 -> true;
is_base_char(C) when C >= 16#0A85, C =< 16#0A8B -> true; %% Gujarati
is_base_char(16#0A8D) -> true;
is_base_char(C) when C >= 16#0A8F, C =< 16#0A91 -> true;
is_base_char(C) when C >= 16#0A93, C =< 16#0AA8 -> true;
is_base_char(C) when C >= 16#0AAA, C =< 16#0AB0 -> true;
is_base_char(C) when C >= 16#0AB2, C =< 16#0AB3 -> true;
is_base_char(C) when C >= 16#0AB5, C =< 16#0AB9 -> true;
is_base_char(16#0ABD) -> true;
is_base_char(16#0AE0) -> true;
is_base_char(C) when C >= 16#0B05, C =< 16#0B0C -> true; %% Oriya
is_base_char(C) when C >= 16#0B0F, C =< 16#0B10 -> true;
is_base_char(C) when C >= 16#0B13, C =< 16#0B28 -> true;
is_base_char(C) when C >= 16#0B2A, C =< 16#0B30 -> true;
is_base_char(C) when C >= 16#0B32, C =< 16#0B33 -> true;
is_base_char(C) when C >= 16#0B36, C =< 16#0B39 -> true;
is_base_char(16#0B3D) -> true;
is_base_char(C) when C >= 16#0B5C, C =< 16#0B5D -> true;
is_base_char(C) when C >= 16#0B5F, C =< 16#0B61 -> true;
is_base_char(C) when C >= 16#0B85, C =< 16#0B8A -> true; %% Tamil
is_base_char(C) when C >= 16#0B8E, C =< 16#0B90 -> true;
is_base_char(C) when C >= 16#0B92, C =< 16#0B95 -> true;
is_base_char(C) when C >= 16#0B99, C =< 16#0B9A -> true;
is_base_char(16#0B9C) -> true;
is_base_char(C) when C >= 16#0B9E, C =< 16#0B9F -> true;
is_base_char(C) when C >= 16#0BA3, C =< 16#0BA4 -> true;
is_base_char(C) when C >= 16#0BA8, C =< 16#0BAA -> true;
is_base_char(C) when C >= 16#0BAE, C =< 16#0BB5 -> true;
is_base_char(C) when C >= 16#0BB7, C =< 16#0BB9 -> true;
is_base_char(C) when C >= 16#0C05, C =< 16#0C0C -> true; %% Telugu
is_base_char(C) when C >= 16#0C0E, C =< 16#0C10 -> true;
is_base_char(C) when C >= 16#0C12, C =< 16#0C28 -> true;
is_base_char(C) when C >= 16#0C2A, C =< 16#0C33 -> true;
is_base_char(C) when C >= 16#0C35, C =< 16#0C39 -> true;
is_base_char(C) when C >= 16#0C60, C =< 16#0C61 -> true;
is_base_char(C) when C >= 16#0C85, C =< 16#0C8C -> true; %% Kannada
is_base_char(C) when C >= 16#0C8E, C =< 16#0C90 -> true;
is_base_char(C) when C >= 16#0C92, C =< 16#0CA8 -> true;
is_base_char(C) when C >= 16#0CAA, C =< 16#0CB3 -> true;
is_base_char(C) when C >= 16#0CB5, C =< 16#0CB9 -> true;
is_base_char(16#0CDE) -> true;
is_base_char(C) when C >= 16#0CE0, C =< 16#0CE1 -> true;
is_base_char(C) when C >= 16#0D05, C =< 16#0D0C -> true; %% Malayalam
is_base_char(C) when C >= 16#0D0E, C =< 16#0D10 -> true;
is_base_char(C) when C >= 16#0D12, C =< 16#0D28 -> true;
is_base_char(C) when C >= 16#0D2A, C =< 16#0D39 -> true;
is_base_char(C) when C >= 16#0D60, C =< 16#0D61 -> true;
is_base_char(C) when C >= 16#0E01, C =< 16#0E2E -> true; %% Thai
is_base_char(16#0E30) -> true;
is_base_char(C) when C >= 16#0E32, C =< 16#0E33 -> true;
is_base_char(C) when C >= 16#0E40, C =< 16#0E45 -> true;
is_base_char(C) when C >= 16#0E81, C =< 16#0E82 -> true; %% Lao
is_base_char(16#0E84) -> true;
is_base_char(C) when C >= 16#0E87, C =< 16#0E88 -> true;
is_base_char(16#0E8A) -> true;
is_base_char(16#0E8D) -> true;
is_base_char(C) when C >= 16#0E94, C =< 16#0E97 -> true;
is_base_char(C) when C >= 16#0E99, C =< 16#0E9F -> true;
is_base_char(C) when C >= 16#0EA1, C =< 16#0EA3 -> true;
is_base_char(16#0EA5) -> true;
is_base_char(16#0EA7) -> true;
is_base_char(C) when C >= 16#0EAA, C =< 16#0EAB -> true;
is_base_char(C) when C >= 16#0EAD, C =< 16#0EAE -> true;
is_base_char(16#0EB0) -> true;
is_base_char(C) when C >= 16#0EB2, C =< 16#0EB3 -> true;
is_base_char(16#0EBD) -> true;
is_base_char(C) when C >= 16#0EC0, C =< 16#0EC4 -> true;
is_base_char(C) when C >= 16#0F40, C =< 16#0F47 -> true; %% Tibetan
is_base_char(C) when C >= 16#0F49, C =< 16#0F69 -> true;
is_base_char(C) when C >= 16#10A0, C =< 16#10C5 -> true; %% Hangul Jamo
is_base_char(C) when C >= 16#10D0, C =< 16#10F6 -> true;
is_base_char(16#1100) -> true;
is_base_char(C) when C >= 16#1102, C =< 16#1103 -> true;
is_base_char(C) when C >= 16#1105, C =< 16#1107 -> true;
is_base_char(16#1109) -> true;
is_base_char(C) when C >= 16#110B, C =< 16#110C -> true;
is_base_char(C) when C >= 16#110E, C =< 16#1112 -> true;
is_base_char(16#113C) -> true;
is_base_char(16#113E) -> true;
is_base_char(16#1140) -> true;
is_base_char(16#114C) -> true;
is_base_char(16#114E) -> true;
is_base_char(16#1150) -> true;
is_base_char(C) when C >= 16#1154, C =< 16#1155 -> true;
is_base_char(16#1159) -> true;
is_base_char(C) when C >= 16#115F, C =< 16#1161 -> true;
is_base_char(16#1163) -> true;
is_base_char(16#1165) -> true;
is_base_char(16#1167) -> true;
is_base_char(16#1169) -> true;
is_base_char(C) when C >= 16#116D, C =< 16#116E -> true;
is_base_char(C) when C >= 16#1172, C =< 16#1173 -> true;
is_base_char(16#1175) -> true;
is_base_char(16#119E) -> true;
is_base_char(16#11A8) -> true;
is_base_char(16#11AB) -> true;
is_base_char(C) when C >= 16#11AE, C =< 16#11AF -> true;
is_base_char(C) when C >= 16#11B7, C =< 16#11B8 -> true;
is_base_char(16#11BA) -> true;
is_base_char(C) when C >= 16#11BC, C =< 16#11C2 -> true;
is_base_char(16#11EB) -> true;
is_base_char(16#11F0) -> true;
is_base_char(16#11F9) -> true;
is_base_char(C) when C >= 16#1E00, C =< 16#1E9B -> true; %% Latin Extended Additional
is_base_char(C) when C >= 16#1EA0, C =< 16#1EF9 -> true;
is_base_char(C) when C >= 16#1F00, C =< 16#1F15 -> true; %% Greek Extended
is_base_char(C) when C >= 16#1F18, C =< 16#1F1D -> true;
is_base_char(C) when C >= 16#1F20, C =< 16#1F45 -> true;
is_base_char(C) when C >= 16#1F48, C =< 16#1F4D -> true;
is_base_char(C) when C >= 16#1F50, C =< 16#1F57 -> true;
is_base_char(16#1F59) -> true;
is_base_char(16#1F5B) -> true;
is_base_char(16#1F5D) -> true;
is_base_char(C) when C >= 16#1F5F, C =< 16#1F7D -> true;
is_base_char(C) when C >= 16#1F80, C =< 16#1FB4 -> true;
is_base_char(C) when C >= 16#1FB6, C =< 16#1FBC -> true;
is_base_char(16#1FBE) -> true;
is_base_char(C) when C >= 16#1FC2, C =< 16#1FC4 -> true;
is_base_char(C) when C >= 16#1FC6, C =< 16#1FCC -> true;
is_base_char(C) when C >= 16#1FD0, C =< 16#1FD3 -> true;
is_base_char(C) when C >= 16#1FD6, C =< 16#1FDB -> true;
is_base_char(C) when C >= 16#1FE0, C =< 16#1FEC -> true;
is_base_char(C) when C >= 16#1FF2, C =< 16#1FF4 -> true;
is_base_char(C) when C >= 16#1FF6, C =< 16#1FFC -> true;
is_base_char(16#2126) -> true;                           %% Letterlike Symbols
is_base_char(C) when C >= 16#212A, C =< 16#212B -> true;
is_base_char(16#212E) -> true;
is_base_char(C) when C >= 16#2180, C =< 16#2182 -> true; %% Number Forms
is_base_char(C) when C >= 16#3041, C =< 16#3094 -> true; %% Hiragana
is_base_char(C) when C >= 16#30A1, C =< 16#30FA -> true; %% Katakana
is_base_char(C) when C >= 16#3105, C =< 16#312C -> true; %% Bopomofo
is_base_char(C) when C >= 16#ac00, C =< 16#d7a3 -> true; %% Hangul Syllables
is_base_char(_) ->
    false.

%%----------------------------------------------------------------------
%% Function  : is_ideographic(Char) -> Result
%% Parameters: Char = char()
%% Result    : true | false
%% Description: Check if character is an ideographic letter.
%%              [86] Ideographic 	
%%----------------------------------------------------------------------
is_ideographic(C) when C >= 16#4e00, C =< 16#9fa5 -> true; %% Unified CJK Ideographs
is_ideographic(16#3007) -> true;                           %% CJK Symbols and Punctuation
is_ideographic(C) when C >= 16#3021, C =< 16#3029 -> true;
is_ideographic(_) ->
    false.

%%----------------------------------------------------------------------
%% Function  : is_ideographic(Char) -> Result
%% Parameters: Char = char()
%% Result    : true | false
%% Description: Check if character is a combining character.
%% [87] CombiningChar
%%----------------------------------------------------------------------
is_combining_char(C) when C >= 16#0300, C =< 16#0345 -> true; %% Combining Diacritics
is_combining_char(C) when C >= 16#0360, C =< 16#0361 -> true;
is_combining_char(C) when C >= 16#0483, C =< 16#0486 -> true; %% Cyrillic Combining Diacritics
is_combining_char(C) when C >= 16#0591, C =< 16#05a1 -> true; %% Hebrew Combining Diacritics
is_combining_char(C) when C >= 16#05a3, C =< 16#05b9 -> true;
is_combining_char(C) when C >= 16#05bb, C =< 16#05bd -> true;
is_combining_char(16#05bf) -> true;
is_combining_char(C) when C >= 16#05c1, C =< 16#05c2 -> true;
is_combining_char(16#05c4) -> true;
is_combining_char(C) when C >= 16#064b, C =< 16#0652 -> true; %% Arabic Combining Diacritics
is_combining_char(16#0670) -> true;
is_combining_char(C) when C >= 16#06d6, C =< 16#06dc -> true;
is_combining_char(C) when C >= 16#06dd, C =< 16#06df -> true;
is_combining_char(C) when C >= 16#06e0, C =< 16#06e4 -> true;
is_combining_char(C) when C >= 16#06e7, C =< 16#06e8 -> true;
is_combining_char(C) when C >= 16#06ea, C =< 16#06ed -> true;
is_combining_char(C) when C >= 16#0901, C =< 16#0903 -> true; %% Devanagari Combining Diacritics
is_combining_char(16#093c) -> true;
is_combining_char(C) when C >= 16#093e, C =< 16#094c -> true;
is_combining_char(16#094d) -> true;
is_combining_char(C) when C >= 16#0951, C =< 16#0954 -> true;
is_combining_char(C) when C >= 16#0962, C =< 16#0963 -> true;
is_combining_char(C) when C >= 16#0981, C =< 16#0983 -> true; %% Bengali Combining Diacritics
is_combining_char(16#09bc) -> true;
is_combining_char(16#09be) -> true;
is_combining_char(16#09bf) -> true;
is_combining_char(C) when C >= 16#09c0, C =< 16#09c4 -> true;
is_combining_char(C) when C >= 16#09c7, C =< 16#09c8 -> true;
is_combining_char(C) when C >= 16#09cb, C =< 16#09cd -> true;
is_combining_char(16#09d7) -> true;
is_combining_char(C) when C >= 16#09e2, C =< 16#09e3 -> true;
is_combining_char(16#0a02) -> true;                           %% Gurmukhi Combining Diacritics
is_combining_char(16#0a3c) -> true;
is_combining_char(16#0a3e) -> true;
is_combining_char(16#0a3f) -> true;
is_combining_char(C) when C >= 16#0a40, C =< 16#0a42 -> true;
is_combining_char(C) when C >= 16#0a47, C =< 16#0a48 -> true;
is_combining_char(C) when C >= 16#0a4b, C =< 16#0a4d -> true;
is_combining_char(C) when C >= 16#0a70, C =< 16#0a71 -> true;
is_combining_char(C) when C >= 16#0a81, C =< 16#0a83 -> true; %% Gujarati Combining Diacritics
is_combining_char(16#0abc) -> true;
is_combining_char(C) when C >= 16#0abe, C =< 16#0ac5 -> true;
is_combining_char(C) when C >= 16#0ac7, C =< 16#0ac9 -> true;
is_combining_char(C) when C >= 16#0acb, C =< 16#0acd -> true;
is_combining_char(C) when C >= 16#0b01, C =< 16#0b03 -> true; %% Oriya Combining Diacritics
is_combining_char(16#0b3c) -> true;
is_combining_char(C) when C >= 16#0b3e, C =< 16#0b43 -> true;
is_combining_char(C) when C >= 16#0b47, C =< 16#0b48 -> true;
is_combining_char(C) when C >= 16#0b4b, C =< 16#0b4d -> true;
is_combining_char(C) when C >= 16#0b56, C =< 16#0b57 -> true;
is_combining_char(C) when C >= 16#0b82, C =< 16#0b83 -> true; %% Tamil Combining Diacritics
is_combining_char(C) when C >= 16#0bbe, C =< 16#0bc2 -> true;
is_combining_char(C) when C >= 16#0bc6, C =< 16#0bc8 -> true;
is_combining_char(C) when C >= 16#0bca, C =< 16#0bcd -> true;
is_combining_char(16#0bd7) -> true;
is_combining_char(C) when C >= 16#0c01, C =< 16#0c03 -> true; %% Telugu Combining Diacritics
is_combining_char(C) when C >= 16#0c3e, C =< 16#0c44 -> true;
is_combining_char(C) when C >= 16#0c46, C =< 16#0c48 -> true;
is_combining_char(C) when C >= 16#0c4a, C =< 16#0c4d -> true;
is_combining_char(C) when C >= 16#0c55, C =< 16#0c56 -> true;
is_combining_char(C) when C >= 16#0c82, C =< 16#0c83 -> true; %% Kannada Combining Diacritics
is_combining_char(C) when C >= 16#0cbe, C =< 16#0cc4 -> true;
is_combining_char(C) when C >= 16#0cc6, C =< 16#0cc8 -> true;
is_combining_char(C) when C >= 16#0cca, C =< 16#0ccd -> true;
is_combining_char(C) when C >= 16#0cd5, C =< 16#0cd6 -> true;
is_combining_char(C) when C >= 16#0d02, C =< 16#0d03 -> true; %% Malayalam Combining Diacritics
is_combining_char(C) when C >= 16#0d3e, C =< 16#0d43 -> true;
is_combining_char(C) when C >= 16#0d46, C =< 16#0d48 -> true;
is_combining_char(C) when C >= 16#0d4a, C =< 16#0d4d -> true;
is_combining_char(16#0d57) -> true;
is_combining_char(16#0e31) -> true;                           %% Thai Combining Diacritics
is_combining_char(C) when C >= 16#0e34, C =< 16#0e3a -> true;
is_combining_char(C) when C >= 16#0e47, C =< 16#0e4e -> true;
is_combining_char(16#0eb1) -> true;                           %% Lao Combining Diacritics
is_combining_char(C) when C >= 16#0eb4, C =< 16#0eb9 -> true;
is_combining_char(C) when C >= 16#0ebb, C =< 16#0ebc -> true;
is_combining_char(C) when C >= 16#0ec8, C =< 16#0ecd -> true;
is_combining_char(C) when C >= 16#0f18, C =< 16#0f19 -> true; %% Tibetan Combining Diacritics
is_combining_char(16#0f35) -> true;
is_combining_char(16#0f37) -> true;
is_combining_char(16#0f39) -> true;
is_combining_char(16#0f3e) -> true;
is_combining_char(16#0f3f) -> true;
is_combining_char(C) when C >= 16#0f71, C =< 16#0f84 -> true;
is_combining_char(C) when C >= 16#0f86, C =< 16#0f8b -> true;
is_combining_char(C) when C >= 16#0f90, C =< 16#0f95 -> true;
is_combining_char(16#0f97) -> true;
is_combining_char(C) when C >= 16#0f99, C =< 16#0fad -> true;
is_combining_char(C) when C >= 16#0fb1, C =< 16#0fb7 -> true;
is_combining_char(16#0fb9) -> true;
is_combining_char(C) when C >= 16#20d0, C =< 16#20dc -> true; %% Math/Technical Combining Diacritics
is_combining_char(16#20e1) -> true;
is_combining_char(C) when C >= 16#302a, C =< 16#302f -> true; %% Ideographic Diacritics
is_combining_char(16#3099) -> true;                           %% Hiragana/Katakana Combining Diacritics
is_combining_char(16#309a) -> true;
is_combining_char(_) -> false.


%%----------------------------------------------------------------------
%% Function  : is_digit(Char) -> Result
%% Parameters: Char = char()
%% Result    : true | false
%% Description: Check if character is a digit.
%%              [88] Digit
%%----------------------------------------------------------------------
is_digit(C) when C >= 16#0030, C =< 16#0039 -> true; %% Basic ASCII digits 0-9
is_digit(C) when C >= 16#0660, C =< 16#0669 -> true; %% Arabic Digits 0-9
is_digit(C) when C >= 16#06F0, C =< 16#06F9 -> true; %% Eastern Arabic-Indic Digits 0-9
is_digit(C) when C >= 16#0966, C =< 16#096f -> true; %% Devanagari Digits 0-9
is_digit(C) when C >= 16#09e6, C =< 16#09ef -> true; %% Bengali Digits 0-9
is_digit(C) when C >= 16#0a66, C =< 16#0a6f -> true; %% Gurmukhi Digits 0-9
is_digit(C) when C >= 16#0ae6, C =< 16#0aef -> true; %% Gujarati Digits 0-9
is_digit(C) when C >= 16#0b66, C =< 16#0b6f -> true; %% Oriya Digits 0-9
is_digit(C) when C >= 16#0be7, C =< 16#0bef -> true; %% Tamil Digits 0-9
is_digit(C) when C >= 16#0c66, C =< 16#0c6f -> true; %% Telugu Digits 0-9
is_digit(C) when C >= 16#0ce6, C =< 16#0cef -> true; %% Kannada Digits 0-9
is_digit(C) when C >= 16#0d66, C =< 16#0d6f -> true; %% Malayalam Digits 0-9
is_digit(C) when C >= 16#0e50, C =< 16#0e59 -> true; %% Thai Digits 0-9
is_digit(C) when C >= 16#0ed0, C =< 16#0ed9 -> true; %% Lao Digits 0-9
is_digit(C) when C >= 16#0f20, C =< 16#0f29 -> true; %% Tibetan Digits 0-9
is_digit(_) -> false.


%%----------------------------------------------------------------------
%% Function  : is_extender(Char) -> Result
%% Parameters: Char = char()
%% Result    : true | false
%% Description: Check if character is an extender character.
%%              [89] Extender
%%----------------------------------------------------------------------
is_extender(16#00b7) -> true;                           %% Middle Dot
is_extender(16#02d0) -> true;                           %% Triangular Colon and Half Colon
is_extender(16#02d1) -> true;
is_extender(16#0387) -> true;                           %% Greek Ano Teleia
is_extender(16#0640) -> true;                           %% Arabic Tatweel
is_extender(16#0e46) -> true;                           %% Thai Maiyamok
is_extender(16#0ec6) -> true;                           %% Lao Ko La
is_extender(16#3005) -> true;                           %% Ideographic Iteration Mark
is_extender(C) when C >= 16#3031, C =< 16#3035 -> true; %% Japanese Kana Repetition Marks
is_extender(C) when C >= 16#309d, C =< 16#309e -> true; %% Japanese Hiragana Iteration Marks
is_extender(C) when C >= 16#30fc, C =< 16#30fe -> true; %% Japanese Kana Iteration Marks
is_extender(_) -> false.



%%======================================================================
%% Callback and Continuation function handling
%%======================================================================
%%----------------------------------------------------------------------
%% Function  : event_callback(Event, State) -> Result
%% Parameters: Event = term()
%%             State = #xmerl_sax_parser_state{}
%% Result    : #xmerl_sax_parser_state{}
%% Description: Function that uses provided fun to send parser events.
%%----------------------------------------------------------------------
event_callback(Event, 
	       #xmerl_sax_parser_state{
		 event_fun=CbFun, 
		 event_state=EventState, 
		 line_no=N,
		 entity=E,
		 current_location=L
		} = State) ->
    try 
	NewEventState = CbFun(Event, {L, E, N}, EventState),
	State#xmerl_sax_parser_state{event_state=NewEventState}
    catch
	throw:ErrorTerm ->
	    throw({event_receiver_error, State, ErrorTerm});
	  exit:Reason ->
	    throw({event_receiver_error, State, {'EXIT', Reason}})
    end.

%%----------------------------------------------------------------------
%% Function  : cf(Rest, State, NextCall) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             NextCall = fun()
%% Result    : {Rest, State}
%% Description: Function that uses provided fun to read another chunk from
%%              input stream and calls the fun in NextCall.
%%----------------------------------------------------------------------
cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _) ->
    ?fatal_error(State, "Continuation function undefined"); 
cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State, 
   NextCall) ->
    Result = 
	try
	    CFun(CState)
	catch
	    throw:ErrorTerm ->
		?fatal_error(State, ErrorTerm);
	      exit:Reason ->
		?fatal_error(State, {'EXIT', Reason})
	end,
    case Result of
	{?STRING_EMPTY, _} ->
	    ?fatal_error(State, "No more bytes"); 
	{NewBytes, NewContState} ->
	    NextCall(?APPEND_STRING(Rest, NewBytes),  
		     State#xmerl_sax_parser_state{continuation_state = NewContState})
    end.

%%----------------------------------------------------------------------
%% Function  : cf(Rest, State, NextCall, P) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             NextCall = fun()
%%             P = term()
%% Result    : {Rest, State}
%% Description: Function that uses provided fun to read another chunk from
%%              input stream and calls the fun in NextCall with P as last parameter.
%%----------------------------------------------------------------------
cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _P, _) ->
    ?fatal_error(State, "Continuation function undefined"); 
cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State, 
   P, NextCall) ->
    Result = 
	    try
		CFun(CState)
	    catch
		throw:ErrorTerm ->
		    ?fatal_error(State, ErrorTerm);
		  exit:Reason ->
		    ?fatal_error(State, {'EXIT', Reason})
	    end,
    case Result of
	{?STRING_EMPTY,  _} ->
	    ?fatal_error(State, "No more bytes"); 
	{NewBytes, NewContState} ->
	    NextCall(?APPEND_STRING(Rest, NewBytes),  
		     State#xmerl_sax_parser_state{continuation_state = NewContState},
		     P)
    end.


%%----------------------------------------------------------------------
%% Function  : cf(Rest, State, P1, P2, NextCall) -> Result
%% Parameters: Rest = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             NextCall = fun()
%%             P1 = term()
%%             P2 = term()
%% Result    : {Rest, State}
%% Description: Function that uses provided fun to read another chunk from
%%              input stream and calls the fun in NextCall with P1 and
%%              P2 as last parameters.
%%----------------------------------------------------------------------
cf(_Rest, #xmerl_sax_parser_state{continuation_fun = undefined} = State, _P1, _P2, _) ->
    ?fatal_error(State, "Continuation function undefined"); 
cf(Rest, #xmerl_sax_parser_state{continuation_fun = CFun, continuation_state = CState} = State, 
   P1, P2, NextCall) ->
    Result = 
	    try
		CFun(CState)
	    catch
		throw:ErrorTerm ->
		    ?fatal_error(State, ErrorTerm);
		  exit:Reason ->
		    ?fatal_error(State, {'EXIT', Reason})
	    end,
    case Result of
	{?STRING_EMPTY,  _} ->
	    ?fatal_error(State, "No more bytes"); 
	{NewBytes, NewContState} ->
	    NextCall(?APPEND_STRING(Rest, NewBytes), 
		     State#xmerl_sax_parser_state{continuation_state = NewContState},
		     P1, P2)
    end.



%%----------------------------------------------------------------------
%% Function  : unicode_incomplete_check(Args, ErrString) -> Result
%% Parameters: Args = [Bytes, State | RestOfArgs]
%%             Bytes = string() | binary()
%%             State = #xmerl_sax_parser_state{}
%%             RestOfArgs = 
%%             ErrString = string()
%% Result    : {Rest, State}
%% Description: 
%%----------------------------------------------------------------------
unicode_incomplete_check([Bytes, #xmerl_sax_parser_state{encoding=Enc} = State | _] = Args, ErrString) when is_binary(Bytes) ->
    case unicode:characters_to_list(Bytes, Enc) of 
	{incomplete, _, _} ->
	    apply(?MODULE, cf, Args);
	{error, _Encoded, _Rest} ->
	    ?fatal_error(State, lists:flatten(io_lib:format("Bad character, not in ~p\n", [Enc]))); 
	_ when ErrString =/= undefined ->
	    ?fatal_error(State, ErrString)
  end;
unicode_incomplete_check([Bytes,State | _], ErrString) when is_list(Bytes), ErrString =/= undefined ->
    ?fatal_error(State, ErrString).


%%----------------------------------------------------------------------
%% Function  : check_uri(Uri, CL) -> Result
%% Parameters: Uri = string()
%%             CL = string()
%% Result    : {atom(), string()}
%% Description: 
%%----------------------------------------------------------------------
check_uri("http://" ++ _ = Url, _CL) ->
    {http, Url};
check_uri("file://" ++ Path, _CL) ->
    {file, Path};
check_uri(Path, CL) -> % ordinary filepath other URI's not supported yet
    %% "file://" already removed when current_location set 
    Tag = get_uri_tag(CL),
    case filename:pathtype(Path) of
	relative ->
	    case Tag of 
		false ->
		    {file, filename:join(CL, Path)};
		T ->
		    {T, CL ++ "/" ++ Path}
	    end;
	absolute ->
	    case Tag of
		false ->
		    {file, filename:absname(Path)};
		T ->
		    {T, CL ++ "/" ++ Path}
	    end;
	volumerelative -> % only windows
	    case Tag of
		false ->
		    [Vol | _] = re:split(CL, ":", [{return,list}]),
		    {file, filename:join(Vol ++ ":", Path)};
		T ->
		    {T, CL ++ "/" ++ Path}
	    end  
    end.

%%----------------------------------------------------------------------
%% Function  : get_uri_tag(Uri) -> Result
%% Parameters: Uri = string()
%% Result    : true |false
%% Description: http / file is the only supported URI for the moment
%%----------------------------------------------------------------------
get_uri_tag(Uri) ->
    case re:split(Uri, "://", [{return,list}]) of
	[Tag, _] ->
	    list_to_atom(Tag);
	[_] ->
	    false
    end.

%%----------------------------------------------------------------------
%% Function  : http_get_file(Host, Port, Key) -> Result
%% Parameters: Host = string()
%%             Port = integer()
%%             Key = string()
%% Result    : string()
%% Description: 
%%----------------------------------------------------------------------
http_get_file(Host, Port, Key) ->
    ConnectTimeOut = 10000,
    SendTimeout = 10000,
    FilenameTempl = filename:basename(Key),

    {Filename, FD} = create_tempfile(FilenameTempl),
    Socket = create_connection(Host, Port, ConnectTimeOut),
    Request = "GET " ++ Key ++ " HTTP/1.0\r\n\r\n",   
    
    case gen_tcp:send(Socket, Request) of
	ok ->
	    try 
		receive_msg(Socket, FD, true, SendTimeout)
	    catch
		throw:{error, Error} -> 
		    file:close(FD),	 	
		    file:delete(Filename),		    
		    throw({error, Error})
	    end;
	{error, _Reason} ->
	    file:close(FD),	 	
	    file:delete(Filename),
	    throw({error, lists:flatten(io_lib:format("Couldn't fetch http://~s:~p/~s",
						      [Host, Port, Key]))})
    end,
    file:close(FD),	 	
    Filename.

%%----------------------------------------------------------------------
%% Function  : receive_msg(Socket, FD, WaitForHeader, Timeout) -> Result
%% Parameters: Socket = io_device()
%%             FD = io_device()
%%             WaitForHeader = boolean()
%%             Timeout = integer()
%% Result    : ok
%% Description: 
%%----------------------------------------------------------------------
receive_msg(Socket, FD, WaitForHeader, Timeout) ->
    receive 
	{tcp_closed, Socket} ->
	    ok;
	{tcp, Socket, Response} when WaitForHeader == false  ->
	    file:write(FD, Response),
	    receive_msg(Socket, FD, WaitForHeader, Timeout);
	{tcp, Socket, Response} ->
	    MsgBody = remove_header(Response),
	    file:write(FD, MsgBody),
	    receive_msg(Socket, FD, false, Timeout);
	{tcp_error, Socket, _Reason} ->
	    gen_tcp:close(Socket),
	    throw({error, "http connection failed"})
    after Timeout ->
	    gen_tcp:close(Socket),
	    throw({error, "http connection timedout"})
    end.


remove_header(<<"\r\n\r\n", MsgBody/binary>>) ->
    MsgBody;
remove_header(<<_C, Rest/binary>>) ->
    remove_header(Rest).

%%----------------------------------------------------------------------
%% Function  : create_connection(Host, Port, Timeout) -> Result
%% Parameters: Host = string()
%%             Port = integer()
%%             Timeout = integer()
%% Result    : io_device()
%% Description: 
%%----------------------------------------------------------------------
create_connection(Host, Port, Timeout) ->
    case gen_tcp:connect(Host, Port,[{packet,0}, binary, {reuseaddr,true}], Timeout) of
	{ok,Socket} ->
	    Socket;
	{error, Reason} ->
	    throw({error, lists:flatten(io_lib:format("Can't connect to ~s:~p ~p\n", 
						      [Host, Port, Reason]))})
    end.

%%----------------------------------------------------------------------
%% Function  : http(Url) -> Result
%% Parameters: Url = string()
%% Result    : {Host, PortInt, Key}
%% Description: 
%%----------------------------------------------------------------------
http("http://" ++ Address) ->
    case string:tokens(Address, ":") of
	[Host, Rest] ->
	    %% At his stage we know that address contains a Port number.
	    {Port, Key} = split_to_slash(Rest, []),
	    case catch list_to_integer(Port) of
		PortInt when is_integer(PortInt) ->
		    {Host, PortInt, Key};
		_ ->
		    throw({error, "Malformed key; port not an integer, should be http://Host:Port/path or http://Host/path"})
	    end;
	[Address] ->
	    %% Use default port
	    {Host, Key} = split_to_slash(Address, []),
	    {Host, ?HTTP_DEF_PORT, Key};
	_What ->
	    throw({error, "Malformed key; should be http://Host:Port/path or http://Host/path"})
    end.

%%----------------------------------------------------------------------
%% Function  : split_to_slash(String, Acc) -> Result
%% Parameters: String = string()
%%             Acc = string()
%% Result    : {string(), string()}
%% Description: 
%%----------------------------------------------------------------------
split_to_slash([], _Acc) ->
    throw({error, "No Key given Host:Port/Key"});
split_to_slash([$/|Rest], Acc) ->
    {lists:reverse(Acc), [$/|Rest]};
split_to_slash([H|T], Acc) ->
    split_to_slash(T, [H|Acc]).


%%----------------------------------------------------------------------
%% Function  : create_tempfile(Template) -> Result
%% Parameters: Template = string()
%% Result    : string()
%% Description: 
%%----------------------------------------------------------------------
create_tempfile(Template) ->
    TmpDir = 
	case os:type() of
	    {unix, _} ->
		case file:read_file_info("/tmp") of
		    {ok, _} ->
			"/tmp";
		    {error,enoent} ->
			throw({error, "/tmp doesn't exist"})
		end;
	    {win32, _} ->
		case os:getenv("TMP") of
		    false ->
			case os:getenv("TEMP") of
			    false ->
				throw({error, "Variabel TMP or TEMP doesn't exist"});
			    P2 ->
				P2
			end;
		    P1 -> 
			P1
		end
	end,
    TmpNameBase = filename:join([TmpDir, os:getpid() ++ Template ++ "."]),
    create_tempfile_1(TmpNameBase, 1).

create_tempfile_1(TmpNameBase, N) ->
    FileName = TmpNameBase ++ integer_to_list(N),
    case file:open(FileName, [write, binary])  of
	{error, _Reason} ->
	    create_tempfile_1(TmpNameBase, N+1);
	{ok, FD} ->
	    {FileName, FD}
    end.
    

%%----------------------------------------------------------------------
%% Function  : filter_endtag_stack(EndTagStack) -> Result
%% Parameters: EndTagStack = [{term(), string(), string(), 
%%                             term(), nslist(), nslist()}]
%% Result    : [string()]
%% Description: Returns a stack with just local names.
%%----------------------------------------------------------------------
filter_endtag_stack(EndTagStack) ->
    filter_endtag_stack(EndTagStack,[]).

filter_endtag_stack([], Acc) ->
    lists:reverse(Acc);
filter_endtag_stack([{_,_,N,_,_,_}| Ts], Acc) ->
    filter_endtag_stack(Ts, [N |Acc]).


%%----------------------------------------------------------------------
%% Function  : format_error(Tag, State, Reason) -> Result
%% Parameters: Tag = atom(), 
%%             State = xmerl_sax_parser_state()
%%             Reason = string()
%% Result    : {atom(), {string(), string(), integer()}, string(), [string()], event_state()}
%% Description: Format the resulting error tuple
%%----------------------------------------------------------------------
format_error(Tag, State, Reason) ->
    {Tag, 
     {
       State#xmerl_sax_parser_state.current_location,
       State#xmerl_sax_parser_state.entity,
       State#xmerl_sax_parser_state.line_no
      },
     Reason,
     filter_endtag_stack(State#xmerl_sax_parser_state.end_tags), 
     State#xmerl_sax_parser_state.event_state}.