From 1f8a70d0860862a8b5d5819f5d9e0240abdbe69e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= <essen@ninenines.eu>
Date: Sat, 20 Dec 2014 02:06:07 +0200
Subject: Add cow_http_hd:parse_content_language/1

From RFC7231 and RFC5646.

The ABNF for language tags is terrible. It makes parsing efficiently
a big challenge and the result is this huge ugly set of functions.
Thankfully triq allows us to make sure the implementation is correct.
A large number of examples has also been extracted from both RFCs.

The various ?IS_ALPHA(C), ?IS_TOKEN(C) and so on have received a
change: they now use 'orelse' instead of ';'. This is because in
this new code we need to check more than one character per clause.

The compilation time for this module increased dramatically.
Apparently happens because the guards are too big. Using ranges
($a =< C =< $z) instead of the current solution makes compilation
much faster, but the function executes twice as slow which is not
acceptable.
---
 include/cow_inline.hrl | 70 ++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/cow_inline.hrl b/include/cow_inline.hrl
index 0fb8b3b..a33b0b9 100644
--- a/include/cow_inline.hrl
+++ b/include/cow_inline.hrl
@@ -18,27 +18,31 @@
 %% IS_ALPHA(Character)
 
 -define(IS_ALPHA(C),
-	C =:= $a; C =:= $b; C =:= $c; C =:= $d; C =:= $e;
-	C =:= $f; C =:= $g; C =:= $h; C =:= $i; C =:= $j;
-	C =:= $k; C =:= $l; C =:= $m; C =:= $n; C =:= $o;
-	C =:= $p; C =:= $q; C =:= $r; C =:= $s; C =:= $t;
-	C =:= $u; C =:= $v; C =:= $w; C =:= $x; C =:= $y;
-	C =:= $z;
-	C =:= $A; C =:= $B; C =:= $C; C =:= $D; C =:= $E;
-	C =:= $F; C =:= $G; C =:= $H; C =:= $I; C =:= $J;
-	C =:= $K; C =:= $L; C =:= $M; C =:= $N; C =:= $O;
-	C =:= $P; C =:= $Q; C =:= $R; C =:= $S; C =:= $T;
-	C =:= $U; C =:= $V; C =:= $W; C =:= $X; C =:= $Y;
+	C =:= $a orelse C =:= $b orelse C =:= $c orelse C =:= $d orelse C =:= $e orelse
+	C =:= $f orelse C =:= $g orelse C =:= $h orelse C =:= $i orelse C =:= $j orelse
+	C =:= $k orelse C =:= $l orelse C =:= $m orelse C =:= $n orelse C =:= $o orelse
+	C =:= $p orelse C =:= $q orelse C =:= $r orelse C =:= $s orelse C =:= $t orelse
+	C =:= $u orelse C =:= $v orelse C =:= $w orelse C =:= $x orelse C =:= $y orelse
+	C =:= $z orelse
+	C =:= $A orelse C =:= $B orelse C =:= $C orelse C =:= $D orelse C =:= $E orelse
+	C =:= $F orelse C =:= $G orelse C =:= $H orelse C =:= $I orelse C =:= $J orelse
+	C =:= $K orelse C =:= $L orelse C =:= $M orelse C =:= $N orelse C =:= $O orelse
+	C =:= $P orelse C =:= $Q orelse C =:= $R orelse C =:= $S orelse C =:= $T orelse
+	C =:= $U orelse C =:= $V orelse C =:= $W orelse C =:= $X orelse C =:= $Y orelse
 	C =:= $Z
 ).
 
 %% IS_DIGIT(Character)
 
 -define(IS_DIGIT(C),
-	C =:= $0; C =:= $1; C =:= $2; C =:= $3; C =:= $4;
-	C =:= $5; C =:= $6; C =:= $7; C =:= $8; C =:= $9
+	C =:= $0 orelse C =:= $1 orelse C =:= $2 orelse C =:= $3 orelse C =:= $4 orelse
+	C =:= $5 orelse C =:= $6 orelse C =:= $7 orelse C =:= $8 orelse C =:= $9
 ).
 
+%% IS_ALPHANUM(Character)
+
+-define(IS_ALPHANUM(C), ?IS_ALPHA(C) orelse ?IS_DIGIT(C)).
+
 %% IS_ETAGC(Character)
 
 -define(IS_ETAGC(C), C =:= 16#21; C >= 16#23, C =/= 16#7f).
@@ -46,16 +50,48 @@
 %% IS_TOKEN(Character)
 
 -define(IS_TOKEN(C),
-	?IS_ALPHA(C); ?IS_DIGIT(C);
-	C =:= $!; C =:= $#; C =:= $$; C =:= $%; C =:= $&;
-	C =:= $'; C =:= $*; C =:= $+; C =:= $-; C =:= $.;
-	C =:= $^; C =:= $_; C =:= $`; C =:= $|; C =:= $~
+	?IS_ALPHA(C) orelse ?IS_DIGIT(C)
+	orelse C =:= $! orelse C =:= $# orelse C =:= $$ orelse C =:= $% orelse C =:= $&
+	orelse C =:= $' orelse C =:= $* orelse C =:= $+ orelse C =:= $- orelse C =:= $.
+	orelse C =:= $^ orelse C =:= $_ orelse C =:= $` orelse C =:= $| orelse C =:= $~
 ).
 
 %% IS_VCHAR(Character)
 
 -define(IS_VCHAR(C), C =:= $\t; C > 31, C =/= 127).
 
+%% LC(Character)
+
+-define(LC(C), case C of
+	$A -> $a;
+	$B -> $b;
+	$C -> $c;
+	$D -> $d;
+	$E -> $e;
+	$F -> $f;
+	$G -> $g;
+	$H -> $h;
+	$I -> $i;
+	$J -> $j;
+	$K -> $k;
+	$L -> $l;
+	$M -> $m;
+	$N -> $n;
+	$O -> $o;
+	$P -> $p;
+	$Q -> $q;
+	$R -> $r;
+	$S -> $s;
+	$T -> $t;
+	$U -> $u;
+	$V -> $v;
+	$W -> $w;
+	$X -> $x;
+	$Y -> $y;
+	$Z -> $z;
+	_ -> C
+end).
+
 %% INLINE_LOWERCASE(Function, Rest, Acc, ...)
 %%
 %% To be included at the end of a case block.
-- 
cgit v1.2.3