diff options
-rw-r--r-- | .gitignore | 8 | ||||
-rw-r--r-- | AUTHORS | 4 | ||||
-rw-r--r-- | CHANGELOG.md | 12 | ||||
-rw-r--r-- | LICENSE | 2 | ||||
-rw-r--r-- | Makefile | 16 | ||||
-rwxr-xr-x | all.sh | 17 | ||||
-rw-r--r-- | build.config | 20 | ||||
-rw-r--r-- | erlang.mk | 1335 | ||||
-rw-r--r-- | include/cow_inline.hrl | 391 | ||||
-rw-r--r-- | include/cow_parse.hrl | 79 | ||||
-rw-r--r-- | src/cow_cookie.erl | 27 | ||||
-rw-r--r-- | src/cow_date.erl | 193 | ||||
-rw-r--r-- | src/cow_http.erl | 263 | ||||
-rw-r--r-- | src/cow_http_hd.erl | 3019 | ||||
-rw-r--r-- | src/cow_http_te.erl | 322 | ||||
-rw-r--r-- | src/cow_mimetypes.erl | 2 | ||||
-rw-r--r-- | src/cow_mimetypes.erl.src | 2 | ||||
-rw-r--r-- | src/cow_multipart.erl | 779 | ||||
-rw-r--r-- | src/cow_qs.erl | 10 | ||||
-rw-r--r-- | src/cow_spdy.erl | 4 | ||||
-rw-r--r-- | src/cow_ws.erl | 599 | ||||
-rw-r--r-- | src/cowlib.app.src | 8 | ||||
-rw-r--r-- | test/eunit_SUITE.erl | 31 |
23 files changed, 6846 insertions, 297 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66ce28d --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.cowlib.plt +.erlang.mk.packages.* +_rel +deps +ebin +logs +relx +test/*.beam @@ -0,0 +1,4 @@ +Cowlib is available thanks to the work of: + +Loïc Hoguin +Mikkel Jensen diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..88146e9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,12 @@ +CHANGELOG +========= + +1.0.1 +----- + + * Multipart: no line break after close delimiter + +1.0.0 +----- + + * Initial release. @@ -1,4 +1,4 @@ -Copyright (c) 2013, Loïc Hoguin <[email protected]> +Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above @@ -1,7 +1,12 @@ # See LICENSE for licensing information. PROJECT = cowlib -CT_SUITES = eunit +#ERLC_OPTS += +bin_opt_info +TEST_ERLC_OPTS += +'{parse_transform, eunit_autoexport}' +'{parse_transform, horse_autoexport}' +PLT_APPS = crypto + +TEST_DEPS = triq +dep_triq = git https://github.com/krestenkrab/triq master include erlang.mk @@ -32,12 +37,15 @@ gen: # Performance testing. +ifeq ($(MAKECMDGOALS),perfs) +.NOTPARALLEL: +endif + deps/horse: git clone -n -- https://github.com/extend/horse $(DEPS_DIR)/horse cd $(DEPS_DIR)/horse ; git checkout -q master $(MAKE) -C $(DEPS_DIR)/horse -perfs: ERLC_OPTS += -DPERF=1 +'{parse_transform, horse_autoexport}' -perfs: clean deps deps/horse app +perfs: test-build $(gen_verbose) erl -noshell -pa ebin deps/horse/ebin \ - -eval 'horse:app_perf($(PROJECT)), init:stop().' + -eval 'horse:app_perf($(PROJECT)), erlang:halt().' @@ -0,0 +1,17 @@ +#!/bin/sh + +KERL_INSTALL_PATH=~/erlang +KERL_RELEASES="r15b r15b01 r15b02 r15b03 r16b r16b01 r16b02 r16b03-1 17.0 17.1.2" + +make build-ct-suites + +for rel in $KERL_RELEASES +do + echo + echo " TESTING $rel" + echo + . $KERL_INSTALL_PATH/$rel/activate + CT_OPTS="-label $rel" make tests +done + +xdg-open logs/all_runs.html diff --git a/build.config b/build.config new file mode 100644 index 0000000..87fd50d --- /dev/null +++ b/build.config @@ -0,0 +1,20 @@ +# Core modules. +# +# Do *not* comment or remove them +# unless you know what you are doing! +core/core +core/deps +core/erlc + +# Plugins. +# +# Comment to disable, uncomment to enable. +plugins/bootstrap +#plugins/c_src +plugins/ct +plugins/dialyzer +#plugins/edoc +plugins/elvis +#plugins/erlydtl +#plugins/relx +plugins/shell @@ -1,4 +1,4 @@ -# Copyright (c) 2013, Loïc Hoguin <[email protected]> +# Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above @@ -12,72 +12,106 @@ # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. -# Project. +.PHONY: all deps app rel docs tests clean distclean help erlang-mk -PROJECT ?= $(notdir $(CURDIR)) - -# Packages database file. - -PKG_FILE ?= $(CURDIR)/.erlang.mk.packages.v1 -export PKG_FILE +ERLANG_MK_VERSION = 1 -PKG_FILE_URL ?= https://raw.github.com/extend/erlang.mk/master/packages.v1.tsv +# Core configuration. -define get_pkg_file - wget --no-check-certificate -O $(PKG_FILE) $(PKG_FILE_URL) || rm $(PKG_FILE) -endef +PROJECT ?= $(notdir $(CURDIR)) +PROJECT := $(strip $(PROJECT)) -# Verbosity and tweaks. +# Verbosity. V ?= 0 -appsrc_verbose_0 = @echo " APP " $(PROJECT).app.src; -appsrc_verbose = $(appsrc_verbose_$(V)) +gen_verbose_0 = @echo " GEN " $@; +gen_verbose = $(gen_verbose_$(V)) -erlc_verbose_0 = @echo " ERLC " $(filter %.erl %.core,$(?F)); -erlc_verbose = $(erlc_verbose_$(V)) +# "erl" command. -xyrl_verbose_0 = @echo " XYRL " $(filter %.xrl %.yrl,$(?F)); -xyrl_verbose = $(xyrl_verbose_$(V)) +ERL = erl +A0 -noinput -boot start_clean -dtl_verbose_0 = @echo " DTL " $(filter %.dtl,$(?F)); -dtl_verbose = $(dtl_verbose_$(V)) +# Core targets. -gen_verbose_0 = @echo " GEN " $@; -gen_verbose = $(gen_verbose_$(V)) +ifneq ($(words $(MAKECMDGOALS)),1) +.NOTPARALLEL: +endif -.PHONY: rel clean-rel all clean-all app clean deps clean-deps \ - docs clean-docs build-tests tests build-plt dialyze +all:: deps + @$(MAKE) --no-print-directory app + @$(MAKE) --no-print-directory rel -# Release. +# Noop to avoid a Make warning when there's nothing to do. +rel:: + @echo -n -RELX_CONFIG ?= $(CURDIR)/relx.config +clean:: clean-crashdump -ifneq ($(wildcard $(RELX_CONFIG)),) +clean-crashdump: +ifneq ($(wildcard erl_crash.dump),) + $(gen_verbose) rm -f erl_crash.dump +endif -RELX ?= $(CURDIR)/relx -export RELX +distclean:: clean + +help:: + @printf "%s\n" \ + "erlang.mk (version $(ERLANG_MK_VERSION)) is distributed under the terms of the ISC License." \ + "Copyright (c) 2013-2014 Loïc Hoguin <[email protected]>" \ + "" \ + "Usage: [V=1] make [-jNUM] [target]" \ + "" \ + "Core targets:" \ + " all Run deps, app and rel targets in that order" \ + " deps Fetch dependencies (if needed) and compile them" \ + " app Compile the project" \ + " rel Build a release for this project, if applicable" \ + " docs Build the documentation for this project" \ + " tests Run the tests for this project" \ + " clean Delete temporary and output files from most targets" \ + " distclean Delete all temporary and output files" \ + " help Display this help and exit" \ + "" \ + "The target clean only removes files that are commonly removed." \ + "Dependencies and releases are left untouched." \ + "" \ + "Setting V=1 when calling make enables verbose mode." \ + "Parallel execution is supported through the -j Make flag." + +# Core functions. + +ifeq ($(shell which wget 2>/dev/null | wc -l), 1) +define core_http_get + wget --no-check-certificate -O $(1) $(2)|| rm $(1) +endef +else +define core_http_get + $(ERL) -eval 'ssl:start(), inets:start(), case httpc:request(get, {"$(2)", []}, [{autoredirect, true}], []) of {ok, {{_, 200, _}, _, Body}} -> case file:write_file("$(1)", Body) of ok -> ok; {error, R1} -> halt(R1) end; {error, R2} -> halt(R2) end, halt(0).' +endef +endif -RELX_URL ?= https://github.com/erlware/relx/releases/download/v0.5.2/relx -RELX_OPTS ?= +# Automated update. -define get_relx - wget -O $(RELX) $(RELX_URL) || rm $(RELX) - chmod +x $(RELX) -endef +ERLANG_MK_BUILD_CONFIG ?= build.config +ERLANG_MK_BUILD_DIR ?= .erlang.mk.build -rel: clean-rel all $(RELX) - @$(RELX) -c $(RELX_CONFIG) $(RELX_OPTS) +erlang-mk: + git clone https://github.com/ninenines/erlang.mk $(ERLANG_MK_BUILD_DIR) + if [ -f $(ERLANG_MK_BUILD_CONFIG) ]; then cp $(ERLANG_MK_BUILD_CONFIG) $(ERLANG_MK_BUILD_DIR); fi + cd $(ERLANG_MK_BUILD_DIR) && make + cp $(ERLANG_MK_BUILD_DIR)/erlang.mk ./erlang.mk + rm -rf $(ERLANG_MK_BUILD_DIR) -$(RELX): - @$(call get_relx) +# Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. -clean-rel: - @rm -rf _rel +.PHONY: distclean-deps distclean-pkg pkg-list pkg-search -endif +# Configuration. -# Deps directory. +AUTOPATCH ?= edown gen_leader gproc +export AUTOPATCH DEPS_DIR ?= $(CURDIR)/deps export DEPS_DIR @@ -86,33 +120,216 @@ REBAR_DEPS_DIR = $(DEPS_DIR) export REBAR_DEPS_DIR ALL_DEPS_DIRS = $(addprefix $(DEPS_DIR)/,$(DEPS)) -ALL_TEST_DEPS_DIRS = $(addprefix $(DEPS_DIR)/,$(TEST_DEPS)) -# Application. - -ERL_LIBS ?= $(DEPS_DIR) +ifeq ($(filter $(DEPS_DIR),$(subst :, ,$(ERL_LIBS))),) +ifeq ($(ERL_LIBS),) + ERL_LIBS = $(DEPS_DIR) +else + ERL_LIBS := $(ERL_LIBS):$(DEPS_DIR) +endif +endif export ERL_LIBS -ERLC_OPTS ?= -Werror +debug_info +warn_export_all +warn_export_vars \ - +warn_shadow_vars +warn_obsolete_guard # +bin_opt_info +warn_missing_spec +PKG_FILE2 ?= $(CURDIR)/.erlang.mk.packages.v2 +export PKG_FILE2 + +PKG_FILE_URL ?= https://raw.githubusercontent.com/ninenines/erlang.mk/master/packages.v2.tsv + +# Core targets. + +deps:: $(ALL_DEPS_DIRS) + @for dep in $(ALL_DEPS_DIRS) ; do \ + if [ -f $$dep/GNUmakefile ] || [ -f $$dep/makefile ] || [ -f $$dep/Makefile ] ; then \ + $(MAKE) -C $$dep ; \ + else \ + echo "include $(CURDIR)/erlang.mk" | ERLC_OPTS=+debug_info $(MAKE) -f - -C $$dep ; \ + fi ; \ + done + +distclean:: distclean-deps distclean-pkg + +# Deps related targets. + +define dep_autopatch + $(ERL) -eval " \ +DepDir = \"$(DEPS_DIR)/$(1)/\", \ +fun() -> \ + {ok, Conf} = file:consult(DepDir ++ \"rebar.config\"), \ + File = case lists:keyfind(deps, 1, Conf) of false -> []; {_, Deps} -> \ + [begin {Method, Repo, Commit} = case Repos of \ + {git, R} -> {git, R, master}; \ + {M, R, {branch, C}} -> {M, R, C}; \ + {M, R, {tag, C}} -> {M, R, C}; \ + {M, R, C} -> {M, R, C} \ + end, \ + io_lib:format(\"DEPS += ~s\ndep_~s = ~s ~s ~s~n\", [Name, Name, Method, Repo, Commit]) \ + end || {Name, _, Repos} <- Deps] \ + end, \ + ok = file:write_file(\"$(DEPS_DIR)/$(1)/Makefile\", [\"ERLC_OPTS = +debug_info\n\n\", File, \"\ninclude erlang.mk\"]) \ +end(), \ +AppSrcOut = \"$(DEPS_DIR)/$(1)/src/$(1).app.src\", \ +AppSrcIn = case filelib:is_regular(AppSrcOut) of false -> \"$(DEPS_DIR)/$(1)/ebin/$(1).app\"; true -> AppSrcOut end, \ +fun() -> \ + {ok, [{application, $(1), L}]} = file:consult(AppSrcIn), \ + L2 = case lists:keyfind(modules, 1, L) of {_, _} -> L; false -> [{modules, []}|L] end, \ + L3 = case lists:keyfind(vsn, 1, L2) of {vsn, git} -> lists:keyreplace(vsn, 1, L2, {vsn, \"git\"}); _ -> L2 end, \ + ok = file:write_file(AppSrcOut, io_lib:format(\"~p.~n\", [{application, $(1), L3}])) \ +end(), \ +case AppSrcOut of AppSrcIn -> ok; _ -> ok = file:delete(AppSrcIn) end, \ +halt()." +endef + +ifeq ($(V),0) +define dep_autopatch_verbose + @echo " PATCH " $(1); +endef +endif + +define dep_fetch + if [ "$$$$VS" = "git" ]; then \ + git clone -n -- $$$$REPO $(DEPS_DIR)/$(1); \ + cd $(DEPS_DIR)/$(1) && git checkout -q $$$$COMMIT; \ + elif [ "$$$$VS" = "hg" ]; then \ + hg clone -U $$$$REPO $(DEPS_DIR)/$(1); \ + cd $(DEPS_DIR)/$(1) && hg update -q $$$$COMMIT; \ + elif [ "$$$$VS" = "svn" ]; then \ + svn checkout $$$$REPO $(DEPS_DIR)/$(1); \ + else \ + echo "Unknown or invalid dependency: $(1). Please consult the erlang.mk README for instructions." >&2; \ + exit 78; \ + fi +endef + +define dep_target +$(DEPS_DIR)/$(1): + @mkdir -p $(DEPS_DIR) +ifeq (,$(dep_$(1))) + @if [ ! -f $(PKG_FILE2) ]; then $(call core_http_get,$(PKG_FILE2),$(PKG_FILE_URL)); fi + @DEPPKG=$$$$(awk 'BEGIN { FS = "\t" }; $$$$1 == "$(1)" { print $$$$2 " " $$$$3 " " $$$$4 }' $(PKG_FILE2);); \ + VS=$$$$(echo $$$$DEPPKG | cut -d " " -f1); \ + REPO=$$$$(echo $$$$DEPPKG | cut -d " " -f2); \ + COMMIT=$$$$(echo $$$$DEPPKG | cut -d " " -f3); \ + $(call dep_fetch,$(1)) +else + @VS=$(word 1,$(dep_$(1))); \ + REPO=$(word 2,$(dep_$(1))); \ + COMMIT=$(word 3,$(dep_$(1))); \ + $(call dep_fetch,$(1)) +endif +ifneq ($(filter $(1),$(AUTOPATCH)),) + $(call dep_autopatch_verbose,$(1)) if [ -f $(DEPS_DIR)/$(1)/rebar.config ]; then \ + $(call dep_autopatch,$(1)); \ + cd $(DEPS_DIR)/$(1)/ && ln -s ../../erlang.mk; \ + elif [ ! -f $(DEPS_DIR)/$(1)/Makefile ]; then \ + echo "ERLC_OPTS = +debug_info\ninclude erlang.mk" > $(DEPS_DIR)/$(1)/Makefile; \ + cd $(DEPS_DIR)/$(1)/ && ln -s ../../erlang.mk; \ + fi +endif +endef + +$(foreach dep,$(DEPS),$(eval $(call dep_target,$(dep)))) + +distclean-deps: + $(gen_verbose) rm -rf $(DEPS_DIR) + +# Packages related targets. + +$(PKG_FILE2): + @$(call core_http_get,$(PKG_FILE2),$(PKG_FILE_URL)) + +pkg-list: $(PKG_FILE2) + @cat $(PKG_FILE2) | awk 'BEGIN { FS = "\t" }; { print \ + "Name:\t\t" $$1 "\n" \ + "Repository:\t" $$3 "\n" \ + "Website:\t" $$5 "\n" \ + "Description:\t" $$6 "\n" }' + +ifdef q +pkg-search: $(PKG_FILE2) + @cat $(PKG_FILE2) | grep -i ${q} | awk 'BEGIN { FS = "\t" }; { print \ + "Name:\t\t" $$1 "\n" \ + "Repository:\t" $$3 "\n" \ + "Website:\t" $$5 "\n" \ + "Description:\t" $$6 "\n" }' +else +pkg-search: + $(error Usage: make pkg-search q=STRING) +endif + +ifeq ($(PKG_FILE2),$(CURDIR)/.erlang.mk.packages.v2) +distclean-pkg: + $(gen_verbose) rm -f $(PKG_FILE2) +endif + +help:: + @printf "%s\n" "" \ + "Package-related targets:" \ + " pkg-list List all known packages" \ + " pkg-search q=STRING Search for STRING in the package index" + +# Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. + +.PHONY: clean-app + +# Configuration. + +ERLC_OPTS ?= -Werror +debug_info +warn_export_vars +warn_shadow_vars \ + +warn_obsolete_guard # +bin_opt_info +warn_export_all +warn_missing_spec COMPILE_FIRST ?= COMPILE_FIRST_PATHS = $(addprefix src/,$(addsuffix .erl,$(COMPILE_FIRST))) +ERLC_EXCLUDE ?= +ERLC_EXCLUDE_PATHS = $(addprefix src/,$(addsuffix .erl,$(ERLC_EXCLUDE))) + +ERLC_MIB_OPTS ?= +COMPILE_MIB_FIRST ?= +COMPILE_MIB_FIRST_PATHS = $(addprefix mibs/,$(addsuffix .mib,$(COMPILE_MIB_FIRST))) + +# Verbosity. + +appsrc_verbose_0 = @echo " APP " $(PROJECT).app.src; +appsrc_verbose = $(appsrc_verbose_$(V)) + +erlc_verbose_0 = @echo " ERLC " $(filter-out $(patsubst %,%.erl,$(ERLC_EXCLUDE)),\ + $(filter %.erl %.core,$(?F))); +erlc_verbose = $(erlc_verbose_$(V)) + +xyrl_verbose_0 = @echo " XYRL " $(filter %.xrl %.yrl,$(?F)); +xyrl_verbose = $(xyrl_verbose_$(V)) -all: deps app +mib_verbose_0 = @echo " MIB " $(filter %.bin %.mib,$(?F)); +mib_verbose = $(mib_verbose_$(V)) -clean-all: clean clean-deps clean-docs - $(gen_verbose) rm -rf .$(PROJECT).plt $(DEPS_DIR) logs +# Targets. -app: ebin/$(PROJECT).app +ifeq ($(wildcard ebin/test),) +app:: app-build +else +app:: clean app-build +endif + +app-build: erlc-include ebin/$(PROJECT).app $(eval MODULES := $(shell find ebin -type f -name \*.beam \ - | sed 's/ebin\///;s/\.beam/,/' | sed '$$s/.$$//')) + | sed "s/ebin\//'/;s/\.beam/',/" | sed '$$s/.$$//')) + @if [ -z "$$(grep -E '^[^%]*{modules,' src/$(PROJECT).app.src)" ]; then \ + echo "Empty modules entry not found in $(PROJECT).app.src. Please consult the erlang.mk README for instructions." >&2; \ + exit 1; \ + fi + $(eval GITDESCRIBE := $(shell git describe --dirty --abbrev=7 --tags --always --first-parent 2>/dev/null || true)) $(appsrc_verbose) cat src/$(PROJECT).app.src \ - | sed 's/{modules,[[:space:]]*\[\]}/{modules, \[$(MODULES)\]}/' \ + | sed "s/{modules,[[:space:]]*\[\]}/{modules, \[$(MODULES)\]}/" \ + | sed "s/{id,[[:space:]]*\"git\"}/{id, \"$(GITDESCRIBE)\"}/" \ > ebin/$(PROJECT).app +erlc-include: + -@if [ -d ebin/ ]; then \ + find include/ src/ -type f -name \*.hrl -newer ebin -exec touch $(shell find src/ -type f -name "*.erl") \; 2>/dev/null || printf ''; \ + fi + define compile_erl $(erlc_verbose) erlc -v $(ERLC_OPTS) -o ebin/ \ - -pa ebin/ -I include/ $(COMPILE_FIRST_PATHS) $(1) + -pa ebin/ -I include/ $(filter-out $(ERLC_EXCLUDE_PATHS),\ + $(COMPILE_FIRST_PATHS) $(1)) endef define compile_xyrl @@ -121,160 +338,938 @@ define compile_xyrl @rm ebin/*.erl endef -define compile_dtl - $(dtl_verbose) erl -noshell -pa ebin/ $(DEPS_DIR)/erlydtl/ebin/ -eval ' \ - Compile = fun(F) -> \ - Module = list_to_atom( \ - string:to_lower(filename:basename(F, ".dtl")) ++ "_dtl"), \ - erlydtl_compiler:compile(F, Module, [{out_dir, "ebin/"}]) \ - end, \ - _ = [Compile(F) || F <- string:tokens("$(1)", " ")], \ - init:stop()' +define compile_mib + $(mib_verbose) erlc -v $(ERLC_MIB_OPTS) -o priv/mibs/ \ + -I priv/mibs/ $(COMPILE_MIB_FIRST_PATHS) $(1) + $(mib_verbose) erlc -o include/ -- priv/mibs/*.bin endef -ebin/$(PROJECT).app: $(shell find src -type f -name \*.erl) \ - $(shell find src -type f -name \*.core) \ - $(shell find src -type f -name \*.xrl) \ - $(shell find src -type f -name \*.yrl) \ - $(shell find templates -type f -name \*.dtl 2>/dev/null) +ifneq ($(wildcard src/),) +ebin/$(PROJECT).app:: @mkdir -p ebin/ - $(if $(strip $(filter %.erl %.core,$?)), \ - $(call compile_erl,$(filter %.erl %.core,$?))) - $(if $(strip $(filter %.xrl %.yrl,$?)), \ - $(call compile_xyrl,$(filter %.xrl %.yrl,$?))) - $(if $(strip $(filter %.dtl,$?)), \ - $(call compile_dtl,$(filter %.dtl,$?))) -clean: - $(gen_verbose) rm -rf ebin/ test/*.beam erl_crash.dump +ifneq ($(wildcard mibs/),) +ebin/$(PROJECT).app:: $(shell find mibs -type f -name \*.mib) + @mkdir -p priv/mibs/ include + $(if $(strip $?),$(call compile_mib,$?)) +endif -# Dependencies. +ebin/$(PROJECT).app:: $(shell find src -type f -name \*.erl) \ + $(shell find src -type f -name \*.core) + $(if $(strip $?),$(call compile_erl,$?)) -define get_dep - @mkdir -p $(DEPS_DIR) -ifeq (,$(findstring pkg://,$(word 1,$(dep_$(1))))) - git clone -n -- $(word 1,$(dep_$(1))) $(DEPS_DIR)/$(1) -else - @if [ ! -f $(PKG_FILE) ]; then $(call get_pkg_file); fi - git clone -n -- `awk 'BEGIN { FS = "\t" }; \ - $$$$1 == "$(subst pkg://,,$(word 1,$(dep_$(1))))" { print $$$$2 }' \ - $(PKG_FILE)` $(DEPS_DIR)/$(1) +ebin/$(PROJECT).app:: $(shell find src -type f -name \*.xrl) \ + $(shell find src -type f -name \*.yrl) + $(if $(strip $?),$(call compile_xyrl,$?)) endif - cd $(DEPS_DIR)/$(1) ; git checkout -q $(word 2,$(dep_$(1))) -endef -define dep_target -$(DEPS_DIR)/$(1): - $(call get_dep,$(1)) -endef +clean:: clean-app -$(foreach dep,$(DEPS),$(eval $(call dep_target,$(dep)))) +clean-app: + $(gen_verbose) rm -rf ebin/ priv/mibs/ \ + $(addprefix include/,$(addsuffix .hrl,$(notdir $(basename $(wildcard mibs/*.mib))))) -deps: $(ALL_DEPS_DIRS) - @for dep in $(ALL_DEPS_DIRS) ; do \ - if [ -f $$dep/Makefile ] ; then \ - $(MAKE) -C $$dep ; \ - else \ - echo "include $(CURDIR)/erlang.mk" | $(MAKE) -f - -C $$dep ; \ - fi ; \ - done +# Copyright (c) 2015, Loïc Hoguin <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. -clean-deps: - @for dep in $(ALL_DEPS_DIRS) ; do \ - if [ -f $$dep/Makefile ] ; then \ - $(MAKE) -C $$dep clean ; \ - else \ - echo "include $(CURDIR)/erlang.mk" | $(MAKE) -f - -C $$dep clean ; \ - fi ; \ - done +.PHONY: test-deps test-dir test-build clean-test-dir -# Documentation. +# Configuration. -EDOC_OPTS ?= +TEST_DIR ?= test -docs: clean-docs - $(gen_verbose) erl -noshell \ - -eval 'edoc:application($(PROJECT), ".", [$(EDOC_OPTS)]), init:stop().' +ALL_TEST_DEPS_DIRS = $(addprefix $(DEPS_DIR)/,$(TEST_DEPS)) -clean-docs: - $(gen_verbose) rm -f doc/*.css doc/*.html doc/*.png doc/edoc-info +TEST_ERLC_OPTS ?= +debug_info +warn_export_vars +warn_shadow_vars +warn_obsolete_guard +TEST_ERLC_OPTS += -DTEST=1 -# Tests. +# Targets. $(foreach dep,$(TEST_DEPS),$(eval $(call dep_target,$(dep)))) -build-test-deps: $(ALL_TEST_DEPS_DIRS) +test-deps: $(ALL_TEST_DEPS_DIRS) @for dep in $(ALL_TEST_DEPS_DIRS) ; do $(MAKE) -C $$dep; done -build-tests: build-test-deps - $(gen_verbose) erlc -v $(ERLC_OPTS) -o test/ \ - $(wildcard test/*.erl test/*/*.erl) -pa ebin/ +ifneq ($(strip $(TEST_DIR)),) +test-dir: + $(gen_verbose) erlc -v $(TEST_ERLC_OPTS) -I include/ -o $(TEST_DIR) \ + $(wildcard $(TEST_DIR)/*.erl $(TEST_DIR)/*/*.erl) -pa ebin/ +endif + +ifeq ($(wildcard ebin/test),) +test-build: ERLC_OPTS=$(TEST_ERLC_OPTS) +test-build: clean deps test-deps + @$(MAKE) --no-print-directory app-build test-dir ERLC_OPTS="$(TEST_ERLC_OPTS)" + $(gen_verbose) touch ebin/test +else +test-build: ERLC_OPTS=$(TEST_ERLC_OPTS) +test-build: deps test-deps + @$(MAKE) --no-print-directory app-build test-dir ERLC_OPTS="$(TEST_ERLC_OPTS)" +endif + +clean:: clean-test-dir + +clean-test-dir: +ifneq ($(wildcard $(TEST_DIR)/*.beam),) + $(gen_verbose) rm -f $(TEST_DIR)/*.beam +endif + +# Copyright (c) 2014-2015, Loïc Hoguin <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. + +.PHONY: bootstrap bootstrap-lib bootstrap-rel new list-templates + +# Core targets. + +help:: + @printf "%s\n" "" \ + "Bootstrap targets:" \ + " bootstrap Generate a skeleton of an OTP application" \ + " bootstrap-lib Generate a skeleton of an OTP library" \ + " bootstrap-rel Generate the files needed to build a release" \ + " new t=TPL n=NAME Generate a module NAME based on the template TPL" \ + " list-templates List available templates" + +# Bootstrap templates. + +bs_appsrc = "{application, $(PROJECT), [" \ + " {description, \"\"}," \ + " {vsn, \"0.1.0\"}," \ + " {id, \"git\"}," \ + " {modules, []}," \ + " {registered, []}," \ + " {applications, [" \ + " kernel," \ + " stdlib" \ + " ]}," \ + " {mod, {$(PROJECT)_app, []}}," \ + " {env, []}" \ + "]}." +bs_appsrc_lib = "{application, $(PROJECT), [" \ + " {description, \"\"}," \ + " {vsn, \"0.1.0\"}," \ + " {id, \"git\"}," \ + " {modules, []}," \ + " {registered, []}," \ + " {applications, [" \ + " kernel," \ + " stdlib" \ + " ]}" \ + "]}." +bs_Makefile = "PROJECT = $(PROJECT)" \ + "include erlang.mk" +bs_app = "-module($(PROJECT)_app)." \ + "-behaviour(application)." \ + "" \ + "-export([start/2])." \ + "-export([stop/1])." \ + "" \ + "start(_Type, _Args) ->" \ + " $(PROJECT)_sup:start_link()." \ + "" \ + "stop(_State) ->" \ + " ok." +bs_relx_config = "{release, {$(PROJECT)_release, \"1\"}, [$(PROJECT)]}." \ + "{extended_start_script, true}." \ + "{sys_config, \"rel/sys.config\"}." \ + "{vm_args, \"rel/vm.args\"}." +bs_sys_config = "[" \ + "]." +bs_vm_args = "-name $(PROJECT)@127.0.0.1" \ + "-setcookie $(PROJECT)" \ + "-heart" +# Normal templates. +tpl_supervisor = "-module($(n))." \ + "-behaviour(supervisor)." \ + "" \ + "-export([start_link/0])." \ + "-export([init/1])." \ + "" \ + "start_link() ->" \ + " supervisor:start_link({local, ?MODULE}, ?MODULE, [])." \ + "" \ + "init([]) ->" \ + " Procs = []," \ + " {ok, {{one_for_one, 1, 5}, Procs}}." +tpl_gen_server = "-module($(n))." \ + "-behaviour(gen_server)." \ + "" \ + "%% API." \ + "-export([start_link/0])." \ + "" \ + "%% gen_server." \ + "-export([init/1])." \ + "-export([handle_call/3])." \ + "-export([handle_cast/2])." \ + "-export([handle_info/2])." \ + "-export([terminate/2])." \ + "-export([code_change/3])." \ + "" \ + "-record(state, {" \ + "})." \ + "" \ + "%% API." \ + "" \ + "-spec start_link() -> {ok, pid()}." \ + "start_link() ->" \ + " gen_server:start_link(?MODULE, [], [])." \ + "" \ + "%% gen_server." \ + "" \ + "init([]) ->" \ + " {ok, \#state{}}." \ + "" \ + "handle_call(_Request, _From, State) ->" \ + " {reply, ignored, State}." \ + "" \ + "handle_cast(_Msg, State) ->" \ + " {noreply, State}." \ + "" \ + "handle_info(_Info, State) ->" \ + " {noreply, State}." \ + "" \ + "terminate(_Reason, _State) ->" \ + " ok." \ + "" \ + "code_change(_OldVsn, State, _Extra) ->" \ + " {ok, State}." +tpl_gen_fsm = "-module($(n))." \ + "-behaviour(gen_fsm)." \ + "" \ + "%% API." \ + "-export([start_link/0])." \ + "" \ + "%% gen_fsm." \ + "-export([init/1])." \ + "-export([state_name/2])." \ + "-export([handle_event/3])." \ + "-export([state_name/3])." \ + "-export([handle_sync_event/4])." \ + "-export([handle_info/3])." \ + "-export([terminate/3])." \ + "-export([code_change/4])." \ + "" \ + "-record(state, {" \ + "})." \ + "" \ + "%% API." \ + "" \ + "-spec start_link() -> {ok, pid()}." \ + "start_link() ->" \ + " gen_fsm:start_link(?MODULE, [], [])." \ + "" \ + "%% gen_fsm." \ + "" \ + "init([]) ->" \ + " {ok, state_name, \#state{}}." \ + "" \ + "state_name(_Event, StateData) ->" \ + " {next_state, state_name, StateData}." \ + "" \ + "handle_event(_Event, StateName, StateData) ->" \ + " {next_state, StateName, StateData}." \ + "" \ + "state_name(_Event, _From, StateData) ->" \ + " {reply, ignored, state_name, StateData}." \ + "" \ + "handle_sync_event(_Event, _From, StateName, StateData) ->" \ + " {reply, ignored, StateName, StateData}." \ + "" \ + "handle_info(_Info, StateName, StateData) ->" \ + " {next_state, StateName, StateData}." \ + "" \ + "terminate(_Reason, _StateName, _StateData) ->" \ + " ok." \ + "" \ + "code_change(_OldVsn, StateName, StateData, _Extra) ->" \ + " {ok, StateName, StateData}." +tpl_cowboy_http = "-module($(n))." \ + "-behaviour(cowboy_http_handler)." \ + "" \ + "-export([init/3])." \ + "-export([handle/2])." \ + "-export([terminate/3])." \ + "" \ + "-record(state, {" \ + "})." \ + "" \ + "init(_, Req, _Opts) ->" \ + " {ok, Req, \#state{}}." \ + "" \ + "handle(Req, State=\#state{}) ->" \ + " {ok, Req2} = cowboy_req:reply(200, Req)," \ + " {ok, Req2, State}." \ + "" \ + "terminate(_Reason, _Req, _State) ->" \ + " ok." +tpl_cowboy_loop = "-module($(n))." \ + "-behaviour(cowboy_loop_handler)." \ + "" \ + "-export([init/3])." \ + "-export([info/3])." \ + "-export([terminate/3])." \ + "" \ + "-record(state, {" \ + "})." \ + "" \ + "init(_, Req, _Opts) ->" \ + " {loop, Req, \#state{}, 5000, hibernate}." \ + "" \ + "info(_Info, Req, State) ->" \ + " {loop, Req, State, hibernate}." \ + "" \ + "terminate(_Reason, _Req, _State) ->" \ + " ok." +tpl_cowboy_rest = "-module($(n))." \ + "" \ + "-export([init/3])." \ + "-export([content_types_provided/2])." \ + "-export([get_html/2])." \ + "" \ + "init(_, _Req, _Opts) ->" \ + " {upgrade, protocol, cowboy_rest}." \ + "" \ + "content_types_provided(Req, State) ->" \ + " {[{{<<\"text\">>, <<\"html\">>, '*'}, get_html}], Req, State}." \ + "" \ + "get_html(Req, State) ->" \ + " {<<\"<html><body>This is REST!</body></html>\">>, Req, State}." +tpl_cowboy_ws = "-module($(n))." \ + "-behaviour(cowboy_websocket_handler)." \ + "" \ + "-export([init/3])." \ + "-export([websocket_init/3])." \ + "-export([websocket_handle/3])." \ + "-export([websocket_info/3])." \ + "-export([websocket_terminate/3])." \ + "" \ + "-record(state, {" \ + "})." \ + "" \ + "init(_, _, _) ->" \ + " {upgrade, protocol, cowboy_websocket}." \ + "" \ + "websocket_init(_, Req, _Opts) ->" \ + " Req2 = cowboy_req:compact(Req)," \ + " {ok, Req2, \#state{}}." \ + "" \ + "websocket_handle({text, Data}, Req, State) ->" \ + " {reply, {text, Data}, Req, State};" \ + "websocket_handle({binary, Data}, Req, State) ->" \ + " {reply, {binary, Data}, Req, State};" \ + "websocket_handle(_Frame, Req, State) ->" \ + " {ok, Req, State}." \ + "" \ + "websocket_info(_Info, Req, State) ->" \ + " {ok, Req, State}." \ + "" \ + "websocket_terminate(_Reason, _Req, _State) ->" \ + " ok." +tpl_ranch_protocol = "-module($(n))." \ + "-behaviour(ranch_protocol)." \ + "" \ + "-export([start_link/4])." \ + "-export([init/4])." \ + "" \ + "-type opts() :: []." \ + "-export_type([opts/0])." \ + "" \ + "-record(state, {" \ + " socket :: inet:socket()," \ + " transport :: module()" \ + "})." \ + "" \ + "start_link(Ref, Socket, Transport, Opts) ->" \ + " Pid = spawn_link(?MODULE, init, [Ref, Socket, Transport, Opts])," \ + " {ok, Pid}." \ + "" \ + "-spec init(ranch:ref(), inet:socket(), module(), opts()) -> ok." \ + "init(Ref, Socket, Transport, _Opts) ->" \ + " ok = ranch:accept_ack(Ref)," \ + " loop(\#state{socket=Socket, transport=Transport})." \ + "" \ + "loop(State) ->" \ + " loop(State)." + +# Plugin-specific targets. + +bootstrap: +ifneq ($(wildcard src/),) + $(error Error: src/ directory already exists) +endif + @printf "%s\n" $(bs_Makefile) > Makefile + @mkdir src/ + @printf "%s\n" $(bs_appsrc) > src/$(PROJECT).app.src + @printf "%s\n" $(bs_app) > src/$(PROJECT)_app.erl + $(eval n := $(PROJECT)_sup) + @printf "%s\n" $(tpl_supervisor) > src/$(PROJECT)_sup.erl + +bootstrap-lib: +ifneq ($(wildcard src/),) + $(error Error: src/ directory already exists) +endif + @printf "%s\n" $(bs_Makefile) > Makefile + @mkdir src/ + @printf "%s\n" $(bs_appsrc_lib) > src/$(PROJECT).app.src + +bootstrap-rel: +ifneq ($(wildcard relx.config),) + $(error Error: relx.config already exists) +endif +ifneq ($(wildcard rel/),) + $(error Error: rel/ directory already exists) +endif + @printf "%s\n" $(bs_relx_config) > relx.config + @mkdir rel/ + @printf "%s\n" $(bs_sys_config) > rel/sys.config + @printf "%s\n" $(bs_vm_args) > rel/vm.args + +new: +ifeq ($(wildcard src/),) + $(error Error: src/ directory does not exist) +endif +ifndef t + $(error Usage: make new t=TEMPLATE n=NAME) +endif +ifndef tpl_$(t) + $(error Unknown template) +endif +ifndef n + $(error Usage: make new t=TEMPLATE n=NAME) +endif + @printf "%s\n" $(tpl_$(t)) > src/$(n).erl + +list-templates: + @echo Available templates: $(sort $(patsubst tpl_%,%,$(filter tpl_%,$(.VARIABLES)))) + +# Copyright (c) 2014-2015, Loïc Hoguin <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. + +.PHONY: clean-c_src distclean-c_src-env +# todo + +# Configuration. + +C_SRC_DIR = $(CURDIR)/c_src +C_SRC_ENV ?= $(C_SRC_DIR)/env.mk +C_SRC_OUTPUT ?= $(CURDIR)/priv/$(PROJECT).so + +# System type and C compiler/flags. + +UNAME_SYS := $(shell uname -s) +ifeq ($(UNAME_SYS), Darwin) + CC ?= cc + CFLAGS ?= -O3 -std=c99 -arch x86_64 -finline-functions -Wall -Wmissing-prototypes + CXXFLAGS ?= -O3 -arch x86_64 -finline-functions -Wall + LDFLAGS ?= -arch x86_64 -flat_namespace -undefined suppress +else ifeq ($(UNAME_SYS), FreeBSD) + CC ?= cc + CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes + CXXFLAGS ?= -O3 -finline-functions -Wall +else ifeq ($(UNAME_SYS), Linux) + CC ?= gcc + CFLAGS ?= -O3 -std=c99 -finline-functions -Wall -Wmissing-prototypes + CXXFLAGS ?= -O3 -finline-functions -Wall +endif + +CFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR) +CXXFLAGS += -fPIC -I $(ERTS_INCLUDE_DIR) -I $(ERL_INTERFACE_INCLUDE_DIR) + +LDLIBS += -L $(ERL_INTERFACE_LIB_DIR) -lerl_interface -lei +LDFLAGS += -shared + +# Verbosity. + +c_verbose_0 = @echo " C " $(?F); +c_verbose = $(c_verbose_$(V)) + +cpp_verbose_0 = @echo " CPP " $(?F); +cpp_verbose = $(cpp_verbose_$(V)) + +link_verbose_0 = @echo " LD " $(@F); +link_verbose = $(link_verbose_$(V)) + +# Targets. + +ifeq ($(wildcard $(C_SRC_DIR)),) +else ifneq ($(wildcard $(C_SRC_DIR)/Makefile),) +app:: + $(MAKE) -C $(C_SRC_DIR) + +clean:: + $(MAKE) -C $(C_SRC_DIR) clean + +else +SOURCES := $(shell find $(C_SRC_DIR) -type f \( -name "*.c" -o -name "*.C" -o -name "*.cc" -o -name "*.cpp" \)) +OBJECTS = $(addsuffix .o, $(basename $(SOURCES))) + +COMPILE_C = $(c_verbose) $(CC) $(CFLAGS) $(CPPFLAGS) -c +COMPILE_CPP = $(cpp_verbose) $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c + +app:: $(C_SRC_ENV) $(C_SRC_OUTPUT) + +$(C_SRC_OUTPUT): $(OBJECTS) + @mkdir -p priv/ + $(link_verbose) $(CC) $(OBJECTS) $(LDFLAGS) $(LDLIBS) -o $(C_SRC_OUTPUT) + +%.o: %.c + $(COMPILE_C) $(OUTPUT_OPTION) $< + +%.o: %.cc + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +%.o: %.C + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +%.o: %.cpp + $(COMPILE_CPP) $(OUTPUT_OPTION) $< + +$(C_SRC_ENV): + @$(ERL) -eval "file:write_file(\"$(C_SRC_ENV)\", \ + io_lib:format( \ + \"ERTS_INCLUDE_DIR ?= ~s/erts-~s/include/~n\" \ + \"ERL_INTERFACE_INCLUDE_DIR ?= ~s~n\" \ + \"ERL_INTERFACE_LIB_DIR ?= ~s~n\", \ + [code:root_dir(), erlang:system_info(version), \ + code:lib_dir(erl_interface, include), \ + code:lib_dir(erl_interface, lib)])), \ + halt()." + +clean:: clean-c_src + +clean-c_src: + $(gen_verbose) rm -f $(C_SRC_OUTPUT) $(OBJECTS) + +distclean:: distclean-c_src-env + +distclean-c_src-env: + $(gen_verbose) rm -f $(C_SRC_ENV) + +-include $(C_SRC_ENV) +endif + +# Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. + +.PHONY: ct distclean-ct + +# Configuration. + +CT_OPTS ?= +ifneq ($(wildcard $(TEST_DIR)),) + CT_SUITES ?= $(sort $(subst _SUITE.erl,,$(shell find $(TEST_DIR) -type f -name \*_SUITE.erl -exec basename {} \;))) +else + CT_SUITES ?= +endif + +# Core targets. + +tests:: ct + +distclean:: distclean-ct + +help:: + @printf "%s\n" "" \ + "Common_test targets:" \ + " ct Run all the common_test suites for this project" \ + "" \ + "All your common_test suites have their associated targets." \ + "A suite named http_SUITE can be ran using the ct-http target." + +# Plugin-specific targets. CT_RUN = ct_run \ -no_auto_compile \ - -noshell \ - -pa $(realpath ebin) $(DEPS_DIR)/*/ebin \ - -dir test \ + -noinput \ + -pa ebin $(DEPS_DIR)/*/ebin \ + -dir $(TEST_DIR) \ -logdir logs -# -cover test/cover.spec -CT_SUITES ?= +ifeq ($(CT_SUITES),) +ct: +else +ct: test-build + @mkdir -p logs/ + $(gen_verbose) $(CT_RUN) -suite $(addsuffix _SUITE,$(CT_SUITES)) $(CT_OPTS) +endif -define test_target -test_$(1): ERLC_OPTS += -DTEST=1 +'{parse_transform, eunit_autoexport}' -test_$(1): clean deps app build-tests - @if [ -d "test" ] ; \ - then \ - mkdir -p logs/ ; \ - $(CT_RUN) -suite $(addsuffix _SUITE,$(1)) ; \ - fi - $(gen_verbose) rm -f test/*.beam +define ct_suite_target +ct-$(1): test-build + @mkdir -p logs/ + $(gen_verbose) $(CT_RUN) -suite $(addsuffix _SUITE,$(1)) $(CT_OPTS) endef -$(foreach test,$(CT_SUITES),$(eval $(call test_target,$(test)))) +$(foreach test,$(CT_SUITES),$(eval $(call ct_suite_target,$(test)))) -tests: ERLC_OPTS += -DTEST=1 +'{parse_transform, eunit_autoexport}' -tests: clean deps app build-tests - @if [ -d "test" ] ; \ - then \ - mkdir -p logs/ ; \ - $(CT_RUN) -suite $(addsuffix _SUITE,$(CT_SUITES)) ; \ - fi - $(gen_verbose) rm -f test/*.beam +distclean-ct: + $(gen_verbose) rm -rf logs/ + +# Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. + +.PHONY: plt distclean-plt dialyze -# Dialyzer. +# Configuration. + +DIALYZER_PLT ?= $(CURDIR)/.$(PROJECT).plt +export DIALYZER_PLT PLT_APPS ?= +DIALYZER_DIRS ?= --src -r src DIALYZER_OPTS ?= -Werror_handling -Wrace_conditions \ -Wunmatched_returns # -Wunderspecs -build-plt: deps app - @dialyzer --build_plt --output_plt .$(PROJECT).plt \ - --apps erts kernel stdlib $(PLT_APPS) $(ALL_DEPS_DIRS) +# Core targets. + +distclean:: distclean-plt +help:: + @printf "%s\n" "" \ + "Dialyzer targets:" \ + " plt Build a PLT file for this project" \ + " dialyze Analyze the project using Dialyzer" + +# Plugin-specific targets. + +$(DIALYZER_PLT): deps app + @dialyzer --build_plt --apps erts kernel stdlib $(PLT_APPS) $(ALL_DEPS_DIRS) + +plt: $(DIALYZER_PLT) + +distclean-plt: + $(gen_verbose) rm -f $(DIALYZER_PLT) + +ifneq ($(wildcard $(DIALYZER_PLT)),) dialyze: - @dialyzer --src src --plt .$(PROJECT).plt --no_native $(DIALYZER_OPTS) +else +dialyze: $(DIALYZER_PLT) +endif + @dialyzer --no_native $(DIALYZER_DIRS) $(DIALYZER_OPTS) -# Packages. +# Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> +# Copyright (c) 2015, Viktor Söderqvist <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. -$(PKG_FILE): - @$(call get_pkg_file) +.PHONY: distclean-edoc build-doc-deps -pkg-list: $(PKG_FILE) - @cat $(PKG_FILE) | awk 'BEGIN { FS = "\t" }; { print \ - "Name:\t\t" $$1 "\n" \ - "Repository:\t" $$2 "\n" \ - "Website:\t" $$3 "\n" \ - "Description:\t" $$4 "\n" }' +# Configuration. -ifdef q -pkg-search: $(PKG_FILE) - @cat $(PKG_FILE) | grep -i ${q} | awk 'BEGIN { FS = "\t" }; { print \ - "Name:\t\t" $$1 "\n" \ - "Repository:\t" $$2 "\n" \ - "Website:\t" $$3 "\n" \ - "Description:\t" $$4 "\n" }' +EDOC_OPTS ?= + +# Core targets. + +docs:: distclean-edoc build-doc-deps + $(gen_verbose) $(ERL) -eval 'edoc:application($(PROJECT), ".", [$(EDOC_OPTS)]), halt().' + +distclean:: distclean-edoc + +# Plugin-specific targets. + +DOC_DEPS_DIRS = $(addprefix $(DEPS_DIR)/,$(DOC_DEPS)) + +$(foreach dep,$(DOC_DEPS),$(eval $(call dep_target,$(dep)))) + +build-doc-deps: $(DOC_DEPS_DIRS) + @for dep in $(DOC_DEPS_DIRS) ; do $(MAKE) -C $$dep; done + +distclean-edoc: + $(gen_verbose) rm -f doc/*.css doc/*.html doc/*.png doc/edoc-info + +# Copyright (c) 2014, Juan Facorro <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. + +.PHONY: elvis distclean-elvis + +# Configuration. + +ELVIS_CONFIG ?= $(CURDIR)/elvis.config + +ELVIS ?= $(CURDIR)/elvis +export ELVIS + +ELVIS_URL ?= https://github.com/inaka/elvis/releases/download/0.2.3/elvis +ELVIS_CONFIG_URL ?= https://github.com/inaka/elvis/releases/download/0.2.3/elvis.config +ELVIS_OPTS ?= + +# Core targets. + +help:: + @printf "%s\n" "" \ + "Elvis targets:" \ + " elvis Run Elvis using the local elvis.config or download the default otherwise" + +distclean:: distclean-elvis + +# Plugin-specific targets. + +$(ELVIS): + @$(call core_http_get,$(ELVIS),$(ELVIS_URL)) + @chmod +x $(ELVIS) + +$(ELVIS_CONFIG): + @$(call core_http_get,$(ELVIS_CONFIG),$(ELVIS_CONFIG_URL)) + +elvis: $(ELVIS) $(ELVIS_CONFIG) + @$(ELVIS) rock -c $(ELVIS_CONFIG) $(ELVIS_OPTS) + +distclean-elvis: + $(gen_verbose) rm -rf $(ELVIS) + +# Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. + +# Configuration. + +DTL_FULL_PATH ?= 0 + +# Verbosity. + +dtl_verbose_0 = @echo " DTL " $(filter %.dtl,$(?F)); +dtl_verbose = $(dtl_verbose_$(V)) + +# Core targets. + +define compile_erlydtl + $(dtl_verbose) $(ERL) -pa ebin/ $(DEPS_DIR)/erlydtl/ebin/ -eval ' \ + Compile = fun(F) -> \ + S = fun (1) -> re:replace(filename:rootname(string:sub_string(F, 11), ".dtl"), "/", "_", [{return, list}, global]); \ + (0) -> filename:basename(F, ".dtl") \ + end, \ + Module = list_to_atom(string:to_lower(S($(DTL_FULL_PATH))) ++ "_dtl"), \ + {ok, _} = erlydtl:compile(F, Module, [{out_dir, "ebin/"}, return_errors, {doc_root, "templates"}]) \ + end, \ + _ = [Compile(F) || F <- string:tokens("$(1)", " ")], \ + halt().' +endef + +ifneq ($(wildcard src/),) +ebin/$(PROJECT).app:: $(shell find templates -type f -name \*.dtl 2>/dev/null) + $(if $(strip $?),$(call compile_erlydtl,$?)) +endif + +# Copyright (c) 2014 Dave Cottlehuber <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. + +.PHONY: distclean-escript escript + +# Configuration. + +ESCRIPT_NAME ?= $(PROJECT) +ESCRIPT_COMMENT ?= This is an -*- erlang -*- file + +ESCRIPT_BEAMS ?= "ebin/*", "deps/*/ebin/*" +ESCRIPT_SYS_CONFIG ?= "rel/sys.config" +ESCRIPT_EMU_ARGS ?= -pa . \ + -sasl errlog_type error \ + -escript main $(ESCRIPT_NAME) +ESCRIPT_SHEBANG ?= /usr/bin/env escript +ESCRIPT_STATIC ?= "deps/*/priv/**", "priv/**" + +# Core targets. + +distclean:: distclean-escript + +help:: + @printf "%s\n" "" \ + "Escript targets:" \ + " escript Build an executable escript archive" \ + +# Plugin-specific targets. + +# Based on https://github.com/synrc/mad/blob/master/src/mad_bundle.erl +# Copyright (c) 2013 Maxim Sokhatsky, Synrc Research Center +# Modified MIT License, https://github.com/synrc/mad/blob/master/LICENSE : +# Software may only be used for the great good and the true happiness of all +# sentient beings. + +define ESCRIPT_RAW +'Read = fun(F) -> {ok, B} = file:read_file(filename:absname(F)), B end,'\ +'Files = fun(L) -> A = lists:concat([filelib:wildcard(X)||X<- L ]),'\ +' [F || F <- A, not filelib:is_dir(F) ] end,'\ +'Squash = fun(L) -> [{filename:basename(F), Read(F) } || F <- L ] end,'\ +'Zip = fun(A, L) -> {ok,{_,Z}} = zip:create(A, L, [{compress,all},memory]), Z end,'\ +'Ez = fun(Escript) ->'\ +' Static = Files([$(ESCRIPT_STATIC)]),'\ +' Beams = Squash(Files([$(ESCRIPT_BEAMS), $(ESCRIPT_SYS_CONFIG)])),'\ +' Archive = Beams ++ [{ "static.gz", Zip("static.gz", Static)}],'\ +' escript:create(Escript, [ $(ESCRIPT_OPTIONS)'\ +' {archive, Archive, [memory]},'\ +' {shebang, "$(ESCRIPT_SHEBANG)"},'\ +' {comment, "$(ESCRIPT_COMMENT)"},'\ +' {emu_args, " $(ESCRIPT_EMU_ARGS)"}'\ +' ]),'\ +' file:change_mode(Escript, 8#755)'\ +'end,'\ +'Ez("$(ESCRIPT_NAME)"),'\ +'halt().' +endef + +ESCRIPT_COMMAND = $(subst ' ',,$(ESCRIPT_RAW)) + +escript:: distclean-escript deps app + $(gen_verbose) $(ERL) -eval $(ESCRIPT_COMMAND) + +distclean-escript: + $(gen_verbose) rm -f $(ESCRIPT_NAME) + +# Copyright (c) 2014, Enrique Fernandez <[email protected]> +# Copyright (c) 2015, Loïc Hoguin <[email protected]> +# This file is contributed to erlang.mk and subject to the terms of the ISC License. + +.PHONY: eunit + +# Configuration + +ifeq ($(strip $(TEST_DIR)),) +TAGGED_EUNIT_TESTS = {dir,"ebin"} else -pkg-search: - @echo "Usage: make pkg-search q=STRING" +ifeq ($(wildcard $(TEST_DIR)),) +TAGGED_EUNIT_TESTS = {dir,"ebin"} +else +# All modules in TEST_DIR +TEST_DIR_MODS = $(notdir $(basename $(shell find $(TEST_DIR) -type f -name *.beam))) +# All modules in 'ebin' +EUNIT_EBIN_MODS = $(notdir $(basename $(shell find ebin -type f -name *.beam))) +# Only those modules in TEST_DIR with no matching module in 'ebin'. +# This is done to avoid some tests being executed twice. +EUNIT_MODS = $(filter-out $(patsubst %,%_tests,$(EUNIT_EBIN_MODS)),$(TEST_DIR_MODS)) +TAGGED_EUNIT_TESTS = {dir,"ebin"} $(foreach mod,$(EUNIT_MODS),$(shell echo $(mod) | sed -e 's/\(.*\)/{module,\1}/g')) +endif +endif + +EUNIT_OPTS ?= verbose + +# Utility functions + +define str-join + $(shell echo '$(strip $(1))' | sed -e "s/ /,/g") +endef + +# Core targets. + +tests:: eunit + +help:: + @printf "%s\n" "" \ + "EUnit targets:" \ + " eunit Run all the EUnit tests for this project" + +# Plugin-specific targets. + +EUNIT_RUN = $(ERL) \ + -pa $(TEST_DIR) $(DEPS_DIR)/*/ebin \ + -pz ebin \ + -eval 'case eunit:test([$(call str-join,$(TAGGED_EUNIT_TESTS))], [$(EUNIT_OPTS)]) of ok -> halt(0); error -> halt(1) end.' + +eunit: test-build + $(gen_verbose) $(EUNIT_RUN) + +# Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. + +.PHONY: relx-rel distclean-relx-rel distclean-relx + +# Configuration. + +RELX_CONFIG ?= $(CURDIR)/relx.config + +RELX ?= $(CURDIR)/relx +export RELX + +RELX_URL ?= https://github.com/erlware/relx/releases/download/v1.2.0/relx +RELX_OPTS ?= +RELX_OUTPUT_DIR ?= _rel + +ifeq ($(firstword $(RELX_OPTS)),-o) + RELX_OUTPUT_DIR = $(word 2,$(RELX_OPTS)) +else + RELX_OPTS += -o $(RELX_OUTPUT_DIR) +endif + +# Core targets. + +ifneq ($(wildcard $(RELX_CONFIG)),) +rel:: distclean-relx-rel relx-rel +endif + +distclean:: distclean-relx-rel distclean-relx + +# Plugin-specific targets. + +define relx_fetch + $(call core_http_get,$(RELX),$(RELX_URL)) + chmod +x $(RELX) +endef + +$(RELX): + @$(call relx_fetch) + +relx-rel: $(RELX) + @$(RELX) -c $(RELX_CONFIG) $(RELX_OPTS) + +distclean-relx-rel: + $(gen_verbose) rm -rf $(RELX_OUTPUT_DIR) + +distclean-relx: + $(gen_verbose) rm -rf $(RELX) + +# Copyright (c) 2014, M Robert Martin <[email protected]> +# This file is contributed to erlang.mk and subject to the terms of the ISC License. + +.PHONY: shell + +# Configuration. + +SHELL_PATH ?= -pa $(CURDIR)/ebin $(DEPS_DIR)/*/ebin +SHELL_OPTS ?= + +ALL_SHELL_DEPS_DIRS = $(addprefix $(DEPS_DIR)/,$(SHELL_DEPS)) + +# Core targets + +help:: + @printf "%s\n" "" \ + "Shell targets:" \ + " shell Run an erlang shell with SHELL_OPTS or reasonable default" + +# Plugin-specific targets. + +$(foreach dep,$(SHELL_DEPS),$(eval $(call dep_target,$(dep)))) + +build-shell-deps: $(ALL_SHELL_DEPS_DIRS) + @for dep in $(ALL_SHELL_DEPS_DIRS) ; do $(MAKE) -C $$dep ; done + +shell: build-shell-deps + $(gen_verbose) erl $(SHELL_PATH) $(SHELL_OPTS) + +# Copyright (c) 2015, Loïc Hoguin <[email protected]> +# This file is part of erlang.mk and subject to the terms of the ISC License. + +ifneq ($(wildcard $(DEPS_DIR)/triq),) +.PHONY: triq + +# Targets. + +tests:: triq + +define triq_run +$(ERL) -pa $(CURDIR)/ebin $(DEPS_DIR)/*/ebin \ + -eval "try $(1) of true -> halt(0); _ -> halt(1) catch error:undef -> io:format(\"Undefined property or module~n\"), halt() end." +endef + +ifdef t +ifeq (,$(findstring :,$(t))) +triq: test-build + @$(call triq_run,triq:check($(t))) +else +triq: test-build + @echo Testing $(t)/0 + @$(call triq_run,triq:check($(t)())) +endif +else +triq: test-build + $(eval MODULES := $(shell find ebin -type f -name \*.beam \ + | sed "s/ebin\//'/;s/\.beam/',/" | sed '$$s/.$$//')) + $(gen_verbose) $(call triq_run,[true] =:= lists:usort([triq:check(M) || M <- [$(MODULES)]])) +endif endif diff --git a/include/cow_inline.hrl b/include/cow_inline.hrl new file mode 100644 index 0000000..5c43a5a --- /dev/null +++ b/include/cow_inline.hrl @@ -0,0 +1,391 @@ +%% Copyright (c) 2014-2015, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-ifndef(COW_INLINE_HRL). +-define(COW_INLINE_HRL, 1). + +%% LC(Character) + +-define(LC(C), case C of + $A -> $a; + $B -> $b; + $C -> $c; + $D -> $d; + $E -> $e; + $F -> $f; + $G -> $g; + $H -> $h; + $I -> $i; + $J -> $j; + $K -> $k; + $L -> $l; + $M -> $m; + $N -> $n; + $O -> $o; + $P -> $p; + $Q -> $q; + $R -> $r; + $S -> $s; + $T -> $t; + $U -> $u; + $V -> $v; + $W -> $w; + $X -> $x; + $Y -> $y; + $Z -> $z; + _ -> C +end). + +%% LOWER(Bin) +%% +%% Lowercase the entire binary string in a binary comprehension. + +-define(LOWER(Bin), << << ?LC(C) >> || << C >> <= Bin >>). + +%% LOWERCASE(Function, Rest, Acc, ...) +%% +%% To be included at the end of a case block. +%% Defined for up to 10 extra arguments. + +-define(LOWER(Function, Rest, Acc), case C of + $A -> Function(Rest, << Acc/binary, $a >>); + $B -> Function(Rest, << Acc/binary, $b >>); + $C -> Function(Rest, << Acc/binary, $c >>); + $D -> Function(Rest, << Acc/binary, $d >>); + $E -> Function(Rest, << Acc/binary, $e >>); + $F -> Function(Rest, << Acc/binary, $f >>); + $G -> Function(Rest, << Acc/binary, $g >>); + $H -> Function(Rest, << Acc/binary, $h >>); + $I -> Function(Rest, << Acc/binary, $i >>); + $J -> Function(Rest, << Acc/binary, $j >>); + $K -> Function(Rest, << Acc/binary, $k >>); + $L -> Function(Rest, << Acc/binary, $l >>); + $M -> Function(Rest, << Acc/binary, $m >>); + $N -> Function(Rest, << Acc/binary, $n >>); + $O -> Function(Rest, << Acc/binary, $o >>); + $P -> Function(Rest, << Acc/binary, $p >>); + $Q -> Function(Rest, << Acc/binary, $q >>); + $R -> Function(Rest, << Acc/binary, $r >>); + $S -> Function(Rest, << Acc/binary, $s >>); + $T -> Function(Rest, << Acc/binary, $t >>); + $U -> Function(Rest, << Acc/binary, $u >>); + $V -> Function(Rest, << Acc/binary, $v >>); + $W -> Function(Rest, << Acc/binary, $w >>); + $X -> Function(Rest, << Acc/binary, $x >>); + $Y -> Function(Rest, << Acc/binary, $y >>); + $Z -> Function(Rest, << Acc/binary, $z >>); + C -> Function(Rest, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, Acc), case C of + $A -> Function(Rest, A0, << Acc/binary, $a >>); + $B -> Function(Rest, A0, << Acc/binary, $b >>); + $C -> Function(Rest, A0, << Acc/binary, $c >>); + $D -> Function(Rest, A0, << Acc/binary, $d >>); + $E -> Function(Rest, A0, << Acc/binary, $e >>); + $F -> Function(Rest, A0, << Acc/binary, $f >>); + $G -> Function(Rest, A0, << Acc/binary, $g >>); + $H -> Function(Rest, A0, << Acc/binary, $h >>); + $I -> Function(Rest, A0, << Acc/binary, $i >>); + $J -> Function(Rest, A0, << Acc/binary, $j >>); + $K -> Function(Rest, A0, << Acc/binary, $k >>); + $L -> Function(Rest, A0, << Acc/binary, $l >>); + $M -> Function(Rest, A0, << Acc/binary, $m >>); + $N -> Function(Rest, A0, << Acc/binary, $n >>); + $O -> Function(Rest, A0, << Acc/binary, $o >>); + $P -> Function(Rest, A0, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, << Acc/binary, $q >>); + $R -> Function(Rest, A0, << Acc/binary, $r >>); + $S -> Function(Rest, A0, << Acc/binary, $s >>); + $T -> Function(Rest, A0, << Acc/binary, $t >>); + $U -> Function(Rest, A0, << Acc/binary, $u >>); + $V -> Function(Rest, A0, << Acc/binary, $v >>); + $W -> Function(Rest, A0, << Acc/binary, $w >>); + $X -> Function(Rest, A0, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, << Acc/binary, $z >>); + C -> Function(Rest, A0, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, A1, Acc), case C of + $A -> Function(Rest, A0, A1, << Acc/binary, $a >>); + $B -> Function(Rest, A0, A1, << Acc/binary, $b >>); + $C -> Function(Rest, A0, A1, << Acc/binary, $c >>); + $D -> Function(Rest, A0, A1, << Acc/binary, $d >>); + $E -> Function(Rest, A0, A1, << Acc/binary, $e >>); + $F -> Function(Rest, A0, A1, << Acc/binary, $f >>); + $G -> Function(Rest, A0, A1, << Acc/binary, $g >>); + $H -> Function(Rest, A0, A1, << Acc/binary, $h >>); + $I -> Function(Rest, A0, A1, << Acc/binary, $i >>); + $J -> Function(Rest, A0, A1, << Acc/binary, $j >>); + $K -> Function(Rest, A0, A1, << Acc/binary, $k >>); + $L -> Function(Rest, A0, A1, << Acc/binary, $l >>); + $M -> Function(Rest, A0, A1, << Acc/binary, $m >>); + $N -> Function(Rest, A0, A1, << Acc/binary, $n >>); + $O -> Function(Rest, A0, A1, << Acc/binary, $o >>); + $P -> Function(Rest, A0, A1, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, A1, << Acc/binary, $q >>); + $R -> Function(Rest, A0, A1, << Acc/binary, $r >>); + $S -> Function(Rest, A0, A1, << Acc/binary, $s >>); + $T -> Function(Rest, A0, A1, << Acc/binary, $t >>); + $U -> Function(Rest, A0, A1, << Acc/binary, $u >>); + $V -> Function(Rest, A0, A1, << Acc/binary, $v >>); + $W -> Function(Rest, A0, A1, << Acc/binary, $w >>); + $X -> Function(Rest, A0, A1, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, A1, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, A1, << Acc/binary, $z >>); + C -> Function(Rest, A0, A1, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, A1, A2, Acc), case C of + $A -> Function(Rest, A0, A1, A2, << Acc/binary, $a >>); + $B -> Function(Rest, A0, A1, A2, << Acc/binary, $b >>); + $C -> Function(Rest, A0, A1, A2, << Acc/binary, $c >>); + $D -> Function(Rest, A0, A1, A2, << Acc/binary, $d >>); + $E -> Function(Rest, A0, A1, A2, << Acc/binary, $e >>); + $F -> Function(Rest, A0, A1, A2, << Acc/binary, $f >>); + $G -> Function(Rest, A0, A1, A2, << Acc/binary, $g >>); + $H -> Function(Rest, A0, A1, A2, << Acc/binary, $h >>); + $I -> Function(Rest, A0, A1, A2, << Acc/binary, $i >>); + $J -> Function(Rest, A0, A1, A2, << Acc/binary, $j >>); + $K -> Function(Rest, A0, A1, A2, << Acc/binary, $k >>); + $L -> Function(Rest, A0, A1, A2, << Acc/binary, $l >>); + $M -> Function(Rest, A0, A1, A2, << Acc/binary, $m >>); + $N -> Function(Rest, A0, A1, A2, << Acc/binary, $n >>); + $O -> Function(Rest, A0, A1, A2, << Acc/binary, $o >>); + $P -> Function(Rest, A0, A1, A2, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, A1, A2, << Acc/binary, $q >>); + $R -> Function(Rest, A0, A1, A2, << Acc/binary, $r >>); + $S -> Function(Rest, A0, A1, A2, << Acc/binary, $s >>); + $T -> Function(Rest, A0, A1, A2, << Acc/binary, $t >>); + $U -> Function(Rest, A0, A1, A2, << Acc/binary, $u >>); + $V -> Function(Rest, A0, A1, A2, << Acc/binary, $v >>); + $W -> Function(Rest, A0, A1, A2, << Acc/binary, $w >>); + $X -> Function(Rest, A0, A1, A2, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, A1, A2, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, A1, A2, << Acc/binary, $z >>); + C -> Function(Rest, A0, A1, A2, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, A1, A2, A3, Acc), case C of + $A -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $a >>); + $B -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $b >>); + $C -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $c >>); + $D -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $d >>); + $E -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $e >>); + $F -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $f >>); + $G -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $g >>); + $H -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $h >>); + $I -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $i >>); + $J -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $j >>); + $K -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $k >>); + $L -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $l >>); + $M -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $m >>); + $N -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $n >>); + $O -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $o >>); + $P -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $q >>); + $R -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $r >>); + $S -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $s >>); + $T -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $t >>); + $U -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $u >>); + $V -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $v >>); + $W -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $w >>); + $X -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, A1, A2, A3, << Acc/binary, $z >>); + C -> Function(Rest, A0, A1, A2, A3, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, A1, A2, A3, A4, Acc), case C of + $A -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $a >>); + $B -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $b >>); + $C -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $c >>); + $D -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $d >>); + $E -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $e >>); + $F -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $f >>); + $G -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $g >>); + $H -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $h >>); + $I -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $i >>); + $J -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $j >>); + $K -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $k >>); + $L -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $l >>); + $M -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $m >>); + $N -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $n >>); + $O -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $o >>); + $P -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $q >>); + $R -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $r >>); + $S -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $s >>); + $T -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $t >>); + $U -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $u >>); + $V -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $v >>); + $W -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $w >>); + $X -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, $z >>); + C -> Function(Rest, A0, A1, A2, A3, A4, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, A1, A2, A3, A4, A5, Acc), case C of + $A -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $a >>); + $B -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $b >>); + $C -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $c >>); + $D -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $d >>); + $E -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $e >>); + $F -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $f >>); + $G -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $g >>); + $H -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $h >>); + $I -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $i >>); + $J -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $j >>); + $K -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $k >>); + $L -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $l >>); + $M -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $m >>); + $N -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $n >>); + $O -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $o >>); + $P -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $q >>); + $R -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $r >>); + $S -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $s >>); + $T -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $t >>); + $U -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $u >>); + $V -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $v >>); + $W -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $w >>); + $X -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, $z >>); + C -> Function(Rest, A0, A1, A2, A3, A4, A5, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, A1, A2, A3, A4, A5, A6, Acc), case C of + $A -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $a >>); + $B -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $b >>); + $C -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $c >>); + $D -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $d >>); + $E -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $e >>); + $F -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $f >>); + $G -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $g >>); + $H -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $h >>); + $I -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $i >>); + $J -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $j >>); + $K -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $k >>); + $L -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $l >>); + $M -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $m >>); + $N -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $n >>); + $O -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $o >>); + $P -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $q >>); + $R -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $r >>); + $S -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $s >>); + $T -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $t >>); + $U -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $u >>); + $V -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $v >>); + $W -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $w >>); + $X -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, $z >>); + C -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, A1, A2, A3, A4, A5, A6, A7, Acc), case C of + $A -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $a >>); + $B -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $b >>); + $C -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $c >>); + $D -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $d >>); + $E -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $e >>); + $F -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $f >>); + $G -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $g >>); + $H -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $h >>); + $I -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $i >>); + $J -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $j >>); + $K -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $k >>); + $L -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $l >>); + $M -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $m >>); + $N -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $n >>); + $O -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $o >>); + $P -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $q >>); + $R -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $r >>); + $S -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $s >>); + $T -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $t >>); + $U -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $u >>); + $V -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $v >>); + $W -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $w >>); + $X -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, $z >>); + C -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, Acc), case C of + $A -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $a >>); + $B -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $b >>); + $C -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $c >>); + $D -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $d >>); + $E -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $e >>); + $F -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $f >>); + $G -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $g >>); + $H -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $h >>); + $I -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $i >>); + $J -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $j >>); + $K -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $k >>); + $L -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $l >>); + $M -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $m >>); + $N -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $n >>); + $O -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $o >>); + $P -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $q >>); + $R -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $r >>); + $S -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $s >>); + $T -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $t >>); + $U -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $u >>); + $V -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $v >>); + $W -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $w >>); + $X -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, $z >>); + C -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, << Acc/binary, C >>) +end). + +-define(LOWER(Function, Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, Acc), case C of + $A -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $a >>); + $B -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $b >>); + $C -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $c >>); + $D -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $d >>); + $E -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $e >>); + $F -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $f >>); + $G -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $g >>); + $H -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $h >>); + $I -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $i >>); + $J -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $j >>); + $K -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $k >>); + $L -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $l >>); + $M -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $m >>); + $N -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $n >>); + $O -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $o >>); + $P -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $p >>); + $Q -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $q >>); + $R -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $r >>); + $S -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $s >>); + $T -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $t >>); + $U -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $u >>); + $V -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $v >>); + $W -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $w >>); + $X -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $x >>); + $Y -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $y >>); + $Z -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, $z >>); + C -> Function(Rest, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, << Acc/binary, C >>) +end). + +-endif. diff --git a/include/cow_parse.hrl b/include/cow_parse.hrl new file mode 100644 index 0000000..5bbf8a3 --- /dev/null +++ b/include/cow_parse.hrl @@ -0,0 +1,79 @@ +%% Copyright (c) 2015, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-ifndef(COW_PARSE_HRL). +-define(COW_PARSE_HRL, 1). + +-define(IS_ALPHA(C), + (C =:= $a) or (C =:= $b) or (C =:= $c) or (C =:= $d) or (C =:= $e) or + (C =:= $f) or (C =:= $g) or (C =:= $h) or (C =:= $i) or (C =:= $j) or + (C =:= $k) or (C =:= $l) or (C =:= $m) or (C =:= $n) or (C =:= $o) or + (C =:= $p) or (C =:= $q) or (C =:= $r) or (C =:= $s) or (C =:= $t) or + (C =:= $u) or (C =:= $v) or (C =:= $w) or (C =:= $x) or (C =:= $y) or + (C =:= $z) or + (C =:= $A) or (C =:= $B) or (C =:= $C) or (C =:= $D) or (C =:= $E) or + (C =:= $F) or (C =:= $G) or (C =:= $H) or (C =:= $I) or (C =:= $J) or + (C =:= $K) or (C =:= $L) or (C =:= $M) or (C =:= $N) or (C =:= $O) or + (C =:= $P) or (C =:= $Q) or (C =:= $R) or (C =:= $S) or (C =:= $T) or + (C =:= $U) or (C =:= $V) or (C =:= $W) or (C =:= $X) or (C =:= $Y) or + (C =:= $Z) +). + +-define(IS_ALPHANUM(C), ?IS_ALPHA(C) or ?IS_DIGIT(C)). +-define(IS_CHAR(C), C > 0, C < 128). + +-define(IS_DIGIT(C), + (C =:= $0) or (C =:= $1) or (C =:= $2) or (C =:= $3) or (C =:= $4) or + (C =:= $5) or (C =:= $6) or (C =:= $7) or (C =:= $8) or (C =:= $9)). + +-define(IS_ETAGC(C), C =:= 16#21; C >= 16#23, C =/= 16#7f). + +-define(IS_HEX(C), + ?IS_DIGIT(C) or + (C =:= $a) or (C =:= $b) or (C =:= $c) or + (C =:= $d) or (C =:= $e) or (C =:= $f) or + (C =:= $A) or (C =:= $B) or (C =:= $C) or + (C =:= $D) or (C =:= $E) or (C =:= $F)). + +-define(IS_LHEX(C), + ?IS_DIGIT(C) or + (C =:= $a) or (C =:= $b) or (C =:= $c) or + (C =:= $d) or (C =:= $e) or (C =:= $f)). + +-define(IS_TOKEN(C), + ?IS_ALPHA(C) or ?IS_DIGIT(C) or + (C =:= $!) or (C =:= $#) or (C =:= $$) or (C =:= $%) or (C =:= $&) or + (C =:= $') or (C =:= $*) or (C =:= $+) or (C =:= $-) or (C =:= $.) or + (C =:= $^) or (C =:= $_) or (C =:= $`) or (C =:= $|) or (C =:= $~)). + +-define(IS_TOKEN68(C), + ?IS_ALPHA(C) or ?IS_DIGIT(C) or + (C =:= $-) or (C =:= $.) or (C =:= $_) or + (C =:= $~) or (C =:= $+) or (C =:= $/)). + +-define(IS_URI_UNRESERVED(C), + ?IS_ALPHA(C) or ?IS_DIGIT(C) or + (C =:= $-) or (C =:= $.) or (C =:= $_) or (C =:= $~)). + +-define(IS_URI_SUB_DELIMS(C), + (C =:= $!) or (C =:= $$) or (C =:= $&) or (C =:= $') or + (C =:= $() or (C =:= $)) or (C =:= $*) or (C =:= $+) or + (C =:= $,) or (C =:= $;) or (C =:= $=)). + +-define(IS_VCHAR(C), C =:= $\t; C > 31, C < 127). +-define(IS_VCHAR_OBS(C), C =:= $\t; C > 31, C =/= 127). +-define(IS_WS(C), (C =:= $\s) or (C =:= $\t)). +-define(IS_WS_COMMA(C), ?IS_WS(C) or (C =:= $,)). + +-endif. diff --git a/src/cow_cookie.erl b/src/cow_cookie.erl index 2ee0a19..150efeb 100644 --- a/src/cow_cookie.erl +++ b/src/cow_cookie.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2013, Loïc Hoguin <[email protected]> +%% Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above @@ -80,8 +80,6 @@ parse_cookie_name(<< C, Rest/binary >>, Acc, Name) -> parse_cookie_value(<<>>, Acc, Name, Value) -> lists:reverse([{Name, parse_cookie_trim(Value)}|Acc]); -parse_cookie_value(<< $,, Rest/binary >>, Acc, Name, Value) -> - parse_cookie(Rest, [{Name, parse_cookie_trim(Value)}|Acc]); parse_cookie_value(<< $;, Rest/binary >>, Acc, Name, Value) -> parse_cookie(Rest, [{Name, parse_cookie_trim(Value)}|Acc]); parse_cookie_value(<< $\t, _/binary >>, _, _, _) -> @@ -130,6 +128,24 @@ parse_cookie_test_() -> %% Space in value. {<<"foo=Thu Jul 11 2013 15:38:43 GMT+0400 (MSK)">>, [{<<"foo">>, <<"Thu Jul 11 2013 15:38:43 GMT+0400 (MSK)">>}]}, + %% Comma in value. Google Analytics sets that kind of cookies. + {<<"refk=sOUZDzq2w2; sk=B602064E0139D842D620C7569640DBB4C81C45080651" + "9CC124EF794863E10E80; __utma=64249653.825741573.1380181332.1400" + "015657.1400019557.703; __utmb=64249653.1.10.1400019557; __utmc=" + "64249653; __utmz=64249653.1400019557.703.13.utmcsr=bluesky.chic" + "agotribune.com|utmccn=(referral)|utmcmd=referral|utmcct=/origin" + "als/chi-12-indispensable-digital-tools-bsi,0,0.storygallery">>, [ + {<<"refk">>, <<"sOUZDzq2w2">>}, + {<<"sk">>, <<"B602064E0139D842D620C7569640DBB4C81C45080651" + "9CC124EF794863E10E80">>}, + {<<"__utma">>, <<"64249653.825741573.1380181332.1400" + "015657.1400019557.703">>}, + {<<"__utmb">>, <<"64249653.1.10.1400019557">>}, + {<<"__utmc">>, <<"64249653">>}, + {<<"__utmz">>, <<"64249653.1400019557.703.13.utmcsr=bluesky.chic" + "agotribune.com|utmccn=(referral)|utmcmd=referral|utmcct=/origin" + "als/chi-12-indispensable-digital-tools-bsi,0,0.storygallery">>} + ]}, %% Potential edge cases (initially from Mochiweb). {<<"foo=\\x">>, [{<<"foo">>, <<"\\x">>}]}, {<<"=">>, {error, badarg}}, @@ -139,9 +155,8 @@ parse_cookie_test_() -> {<<"foo=\\\";;bar=good ">>, [{<<"foo">>, <<"\\\"">>}, {<<"bar">>, <<"good">>}]}, {<<"foo=\"\\\";bar">>, {error, badarg}}, - {<<>>, []}, - {<<"foo=bar , baz=wibble ">>, - [{<<"foo">>, <<"bar">>}, {<<"baz">>, <<"wibble">>}]} + {<<>>, []}, %% Flash player. + {<<"foo=bar , baz=wibble ">>, [{<<"foo">>, <<"bar , baz=wibble">>}]} ], [{V, fun() -> R = parse_cookie(V) end} || {V, R} <- Tests]. -endif. diff --git a/src/cow_date.erl b/src/cow_date.erl index a9641df..b805aec 100644 --- a/src/cow_date.erl +++ b/src/cow_date.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2013, Loïc Hoguin <[email protected]> +%% Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above @@ -14,8 +14,191 @@ -module(cow_date). +-export([parse_date/1]). -export([rfc2109/1]). +-ifdef(TEST). +-include_lib("triq/include/triq.hrl"). +-endif. + +%% @doc Parse the HTTP date (IMF-fixdate, rfc850, asctime). + +-define(DIGITS(A, B), ((A - $0) * 10 + (B - $0))). +-define(DIGITS(A, B, C, D), ((A - $0) * 1000 + (B - $0) * 100 + (C - $0) * 10 + (D - $0))). + +-spec parse_date(binary()) -> calendar:datetime(). +parse_date(DateBin) -> + Date = {{_, _, D}, {H, M, S}} = http_date(DateBin), + true = D >= 0 andalso D =< 31, + true = H >= 0 andalso H =< 23, + true = M >= 0 andalso M =< 59, + true = S >= 0 andalso S =< 60, %% Leap second. + Date. + +http_date(<<"Mon, ", D1, D2, " ", R/bits >>) -> fixdate(R, ?DIGITS(D1, D2)); +http_date(<<"Tue, ", D1, D2, " ", R/bits >>) -> fixdate(R, ?DIGITS(D1, D2)); +http_date(<<"Wed, ", D1, D2, " ", R/bits >>) -> fixdate(R, ?DIGITS(D1, D2)); +http_date(<<"Thu, ", D1, D2, " ", R/bits >>) -> fixdate(R, ?DIGITS(D1, D2)); +http_date(<<"Fri, ", D1, D2, " ", R/bits >>) -> fixdate(R, ?DIGITS(D1, D2)); +http_date(<<"Sat, ", D1, D2, " ", R/bits >>) -> fixdate(R, ?DIGITS(D1, D2)); +http_date(<<"Sun, ", D1, D2, " ", R/bits >>) -> fixdate(R, ?DIGITS(D1, D2)); +http_date(<<"Monday, ", D1, D2, "-", R/bits >>) -> rfc850_date(R, ?DIGITS(D1, D2)); +http_date(<<"Tuesday, ", D1, D2, "-", R/bits >>) -> rfc850_date(R, ?DIGITS(D1, D2)); +http_date(<<"Wednesday, ", D1, D2, "-", R/bits >>) -> rfc850_date(R, ?DIGITS(D1, D2)); +http_date(<<"Thursday, ", D1, D2, "-", R/bits >>) -> rfc850_date(R, ?DIGITS(D1, D2)); +http_date(<<"Friday, ", D1, D2, "-", R/bits >>) -> rfc850_date(R, ?DIGITS(D1, D2)); +http_date(<<"Saturday, ", D1, D2, "-", R/bits >>) -> rfc850_date(R, ?DIGITS(D1, D2)); +http_date(<<"Sunday, ", D1, D2, "-", R/bits >>) -> rfc850_date(R, ?DIGITS(D1, D2)); +http_date(<<"Mon ", R/bits >>) -> asctime_date(R); +http_date(<<"Tue ", R/bits >>) -> asctime_date(R); +http_date(<<"Wed ", R/bits >>) -> asctime_date(R); +http_date(<<"Thu ", R/bits >>) -> asctime_date(R); +http_date(<<"Fri ", R/bits >>) -> asctime_date(R); +http_date(<<"Sat ", R/bits >>) -> asctime_date(R); +http_date(<<"Sun ", R/bits >>) -> asctime_date(R). + +fixdate(<<"Jan ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 1, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"Feb ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 2, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"Mar ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 3, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"Apr ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 4, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"May ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 5, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"Jun ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 6, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"Jul ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 7, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"Aug ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 8, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"Sep ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 9, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"Oct ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 10, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"Nov ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 11, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +fixdate(<<"Dec ", Y1, Y2, Y3, Y4, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 12, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}. + +rfc850_date(<<"Jan-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 1, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"Feb-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 2, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"Mar-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 3, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"Apr-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 4, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"May-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 5, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"Jun-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 6, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"Jul-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 7, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"Aug-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 8, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"Sep-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 9, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"Oct-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 10, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"Nov-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 11, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +rfc850_date(<<"Dec-", Y1, Y2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " GMT">>, Day) -> + {{rfc850_year(?DIGITS(Y1, Y2)), 12, Day}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}. + +rfc850_year(Y) when Y > 50 -> Y + 1900; +rfc850_year(Y) -> Y + 2000. + +asctime_date(<<"Jan ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 1, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"Feb ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 2, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"Mar ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 3, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"Apr ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 4, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"May ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 5, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"Jun ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 6, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"Jul ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 7, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"Aug ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 8, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"Sep ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 9, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"Oct ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 10, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"Nov ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 11, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}; +asctime_date(<<"Dec ", D1, D2, " ", H1, H2, ":", M1, M2, ":", S1, S2, " ", Y1, Y2, Y3, Y4 >>) -> + {{?DIGITS(Y1, Y2, Y3, Y4), 12, asctime_day(D1, D2)}, {?DIGITS(H1, H2), ?DIGITS(M1, M2), ?DIGITS(S1, S2)}}. + +asctime_day($\s, D2) -> (D2 - $0); +asctime_day(D1, D2) -> (D1 - $0) * 10 + (D2 - $0). + +-ifdef(TEST). +day_name() -> oneof(["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]). +day_name_l() -> oneof(["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]). +year() -> int(1951, 2050). +month() -> int(1, 12). +day() -> int(1, 31). +hour() -> int(23). +minute() -> int(59). +second() -> int(60). + +fixdate_gen() -> + ?LET({DayName, Y, Mo, D, H, Mi, S}, + {day_name(), year(), month(), day(), hour(), minute(), second()}, + {{{Y, Mo, D}, {H, Mi, S}}, + list_to_binary([DayName, ", ", pad_int(D), " ", month(Mo), " ", integer_to_binary(Y), + " ", pad_int(H), ":", pad_int(Mi), ":", pad_int(S), " GMT"])}). + +rfc850_gen() -> + ?LET({DayName, Y, Mo, D, H, Mi, S}, + {day_name_l(), year(), month(), day(), hour(), minute(), second()}, + {{{Y, Mo, D}, {H, Mi, S}}, + list_to_binary([DayName, ", ", pad_int(D), "-", month(Mo), "-", pad_int(Y rem 100), + " ", pad_int(H), ":", pad_int(Mi), ":", pad_int(S), " GMT"])}). + +asctime_gen() -> + ?LET({DayName, Y, Mo, D, H, Mi, S}, + {day_name(), year(), month(), day(), hour(), minute(), second()}, + {{{Y, Mo, D}, {H, Mi, S}}, + list_to_binary([DayName, " ", month(Mo), " ", + if D < 10 -> << $\s, (D + $0) >>; true -> integer_to_binary(D) end, + " ", pad_int(H), ":", pad_int(Mi), ":", pad_int(S), " ", integer_to_binary(Y)])}). + +prop_http_date() -> + ?FORALL({Date, DateBin}, + oneof([fixdate_gen(), rfc850_gen(), asctime_gen()]), + Date =:= parse_date(DateBin)). + +http_date_test_() -> + Tests = [ + {<<"Sun, 06 Nov 1994 08:49:37 GMT">>, {{1994, 11, 6}, {8, 49, 37}}}, + {<<"Sunday, 06-Nov-94 08:49:37 GMT">>, {{1994, 11, 6}, {8, 49, 37}}}, + {<<"Sun Nov 6 08:49:37 1994">>, {{1994, 11, 6}, {8, 49, 37}}} + ], + [{V, fun() -> R = http_date(V) end} || {V, R} <- Tests]. + +horse_http_date_fixdate() -> + horse:repeat(200000, + http_date(<<"Sun, 06 Nov 1994 08:49:37 GMT">>) + ). + +horse_http_date_rfc850() -> + horse:repeat(200000, + http_date(<<"Sunday, 06-Nov-94 08:49:37 GMT">>) + ). + +horse_http_date_asctime() -> + horse:repeat(200000, + http_date(<<"Sun Nov 6 08:49:37 1994">>) + ). +-endif. + %% @doc Return the date formatted according to RFC2109. -spec rfc2109(calendar:datetime()) -> binary(). @@ -36,9 +219,7 @@ rfc2109_test_() -> {<<"Sun, 01-Jan-2012 00:00:00 GMT">>, {{2012, 1, 1}, { 0, 0, 0}}} ], [{R, fun() -> R = rfc2109(D) end} || {R, D} <- Tests]. --endif. --ifdef(PERF). horse_rfc2019_20130101_000000() -> horse:repeat(100000, rfc2109({{2013, 1, 1}, {0, 0, 0}}) @@ -117,7 +298,9 @@ pad_int(55) -> <<"55">>; pad_int(56) -> <<"56">>; pad_int(57) -> <<"57">>; pad_int(58) -> <<"58">>; -pad_int(59) -> <<"59">>. +pad_int(59) -> <<"59">>; +pad_int(60) -> <<"60">>; +pad_int(Int) -> integer_to_binary(Int). -spec weekday(1..7) -> <<_:24>>. weekday(1) -> <<"Mon">>; @@ -203,4 +386,4 @@ year(2026) -> <<"2026">>; year(2027) -> <<"2027">>; year(2028) -> <<"2028">>; year(2029) -> <<"2029">>; -year(Year) -> list_to_binary(integer_to_list(Year)). +year(Year) -> integer_to_binary(Year). diff --git a/src/cow_http.erl b/src/cow_http.erl index 5e8fa22..8f2ae92 100644 --- a/src/cow_http.erl +++ b/src/cow_http.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2013, Loïc Hoguin <[email protected]> +%% Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above @@ -14,71 +14,185 @@ -module(cow_http). --export([parse_fullhost/1]). +%% @todo parse_request_line +-export([parse_status_line/1]). +-export([parse_headers/1]). + -export([parse_fullpath/1]). -export([parse_version/1]). -%% @doc Extract host and port from a binary. -%% -%% Because the hostname is case insensitive it is converted -%% to lowercase. - --spec parse_fullhost(binary()) -> {binary(), undefined | non_neg_integer()}. -parse_fullhost(Fullhost) -> - parse_fullhost(Fullhost, false, <<>>). - -parse_fullhost(<< $[, Rest/bits >>, false, <<>>) -> - parse_fullhost(Rest, true, << $[ >>); -parse_fullhost(<<>>, false, Acc) -> - {Acc, undefined}; -parse_fullhost(<< $:, Rest/bits >>, false, Acc) -> - {Acc, list_to_integer(binary_to_list(Rest))}; -parse_fullhost(<< $], Rest/bits >>, true, Acc) -> - parse_fullhost(Rest, false, << Acc/binary, $] >>); -parse_fullhost(<< C, Rest/bits >>, E, Acc) -> +-export([request/4]). +-export([version/1]). + +-type version() :: 'HTTP/1.0' | 'HTTP/1.1'. +-type status() :: 100..999. +-type headers() :: [{binary(), iodata()}]. + +-include("cow_inline.hrl"). + +%% @doc Parse the status line. + +-spec parse_status_line(binary()) -> {version(), status(), binary(), binary()}. +parse_status_line(<< "HTTP/1.1 200 OK\r\n", Rest/bits >>) -> + {'HTTP/1.1', 200, <<"OK">>, Rest}; +parse_status_line(<< "HTTP/1.1 404 Not Found\r\n", Rest/bits >>) -> + {'HTTP/1.1', 404, <<"Not Found">>, Rest}; +parse_status_line(<< "HTTP/1.1 500 Internal Server Error\r\n", Rest/bits >>) -> + {'HTTP/1.1', 500, <<"Internal Server Error">>, Rest}; +parse_status_line(<< "HTTP/1.1 ", Status/bits >>) -> + parse_status_line(Status, 'HTTP/1.1'); +parse_status_line(<< "HTTP/1.0 ", Status/bits >>) -> + parse_status_line(Status, 'HTTP/1.0'). + +parse_status_line(<< H, T, U, " ", Rest/bits >>, Version) + when $0 =< H, H =< $9, $0 =< T, T =< $9, $0 =< U, U =< $9 -> + Status = (H - $0) * 100 + (T - $0) * 10 + (U - $0), + {Pos, _} = binary:match(Rest, <<"\r">>), + << StatusStr:Pos/binary, "\r\n", Rest2/bits >> = Rest, + {Version, Status, StatusStr, Rest2}. + +-ifdef(TEST). +parse_status_line_test_() -> + Tests = [ + {<<"HTTP/1.1 200 OK\r\nRest">>, + {'HTTP/1.1', 200, <<"OK">>, <<"Rest">>}}, + {<<"HTTP/1.0 404 Not Found\r\nRest">>, + {'HTTP/1.0', 404, <<"Not Found">>, <<"Rest">>}}, + {<<"HTTP/1.1 500 Something very funny here\r\nRest">>, + {'HTTP/1.1', 500, <<"Something very funny here">>, <<"Rest">>}}, + {<<"HTTP/1.1 200 \r\nRest">>, + {'HTTP/1.1', 200, <<>>, <<"Rest">>}} + ], + [{V, fun() -> R = parse_status_line(V) end} + || {V, R} <- Tests]. + +parse_status_line_error_test_() -> + Tests = [ + <<>>, + <<"HTTP/1.1">>, + <<"HTTP/1.1 200\r\n">>, + <<"HTTP/1.1 200 OK">>, + <<"HTTP/1.1 200 OK\r">>, + <<"HTTP/1.1 200 OK\n">>, + <<"HTTP/0.9 200 OK\r\n">>, + <<"HTTP/1.1 42 Answer\r\n">>, + <<"HTTP/1.1 999999999 More than OK\r\n">>, + <<"content-type: text/plain\r\n">>, + <<0:80, "\r\n">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_status_line(V)) end} + || V <- Tests]. + +horse_parse_status_line_200() -> + horse:repeat(200000, + parse_status_line(<<"HTTP/1.1 200 OK\r\n">>) + ). + +horse_parse_status_line_404() -> + horse:repeat(200000, + parse_status_line(<<"HTTP/1.1 404 Not Found\r\n">>) + ). + +horse_parse_status_line_500() -> + horse:repeat(200000, + parse_status_line(<<"HTTP/1.1 500 Internal Server Error\r\n">>) + ). + +horse_parse_status_line_other() -> + horse:repeat(200000, + parse_status_line(<<"HTTP/1.1 416 Requested range not satisfiable\r\n">>) + ). +-endif. + +%% @doc Parse the list of headers. + +-spec parse_headers(binary()) -> {[{binary(), binary()}], binary()}. +parse_headers(Data) -> + parse_header(Data, []). + +parse_header(<< $\r, $\n, Rest/bits >>, Acc) -> + {lists:reverse(Acc), Rest}; +parse_header(Data, Acc) -> + parse_hd_name(Data, Acc, <<>>). + +parse_hd_name(<< C, Rest/bits >>, Acc, SoFar) -> case C of - $A -> parse_fullhost(Rest, E, << Acc/binary, $a >>); - $B -> parse_fullhost(Rest, E, << Acc/binary, $b >>); - $C -> parse_fullhost(Rest, E, << Acc/binary, $c >>); - $D -> parse_fullhost(Rest, E, << Acc/binary, $d >>); - $E -> parse_fullhost(Rest, E, << Acc/binary, $e >>); - $F -> parse_fullhost(Rest, E, << Acc/binary, $f >>); - $G -> parse_fullhost(Rest, E, << Acc/binary, $g >>); - $H -> parse_fullhost(Rest, E, << Acc/binary, $h >>); - $I -> parse_fullhost(Rest, E, << Acc/binary, $i >>); - $J -> parse_fullhost(Rest, E, << Acc/binary, $j >>); - $K -> parse_fullhost(Rest, E, << Acc/binary, $k >>); - $L -> parse_fullhost(Rest, E, << Acc/binary, $l >>); - $M -> parse_fullhost(Rest, E, << Acc/binary, $m >>); - $N -> parse_fullhost(Rest, E, << Acc/binary, $n >>); - $O -> parse_fullhost(Rest, E, << Acc/binary, $o >>); - $P -> parse_fullhost(Rest, E, << Acc/binary, $p >>); - $Q -> parse_fullhost(Rest, E, << Acc/binary, $q >>); - $R -> parse_fullhost(Rest, E, << Acc/binary, $r >>); - $S -> parse_fullhost(Rest, E, << Acc/binary, $s >>); - $T -> parse_fullhost(Rest, E, << Acc/binary, $t >>); - $U -> parse_fullhost(Rest, E, << Acc/binary, $u >>); - $V -> parse_fullhost(Rest, E, << Acc/binary, $v >>); - $W -> parse_fullhost(Rest, E, << Acc/binary, $w >>); - $X -> parse_fullhost(Rest, E, << Acc/binary, $x >>); - $Y -> parse_fullhost(Rest, E, << Acc/binary, $y >>); - $Z -> parse_fullhost(Rest, E, << Acc/binary, $z >>); - _ -> parse_fullhost(Rest, E, << Acc/binary, C >>) + $: -> parse_hd_before_value(Rest, Acc, SoFar); + $\s -> parse_hd_name_ws(Rest, Acc, SoFar); + $\t -> parse_hd_name_ws(Rest, Acc, SoFar); + _ -> ?LOWER(parse_hd_name, Rest, Acc, SoFar) end. +parse_hd_name_ws(<< C, Rest/bits >>, Acc, Name) -> + case C of + $: -> parse_hd_before_value(Rest, Acc, Name); + $\s -> parse_hd_name_ws(Rest, Acc, Name); + $\t -> parse_hd_name_ws(Rest, Acc, Name) + end. + +parse_hd_before_value(<< $\s, Rest/bits >>, Acc, Name) -> + parse_hd_before_value(Rest, Acc, Name); +parse_hd_before_value(<< $\t, Rest/bits >>, Acc, Name) -> + parse_hd_before_value(Rest, Acc, Name); +parse_hd_before_value(Data, Acc, Name) -> + parse_hd_value(Data, Acc, Name, <<>>). + +parse_hd_value(<< $\r, Rest/bits >>, Acc, Name, SoFar) -> + case Rest of + << $\n, C, Rest2/bits >> when C =:= $\s; C =:= $\t -> + parse_hd_value(Rest2, Acc, Name, << SoFar/binary, C >>); + << $\n, Rest2/bits >> -> + parse_header(Rest2, [{Name, SoFar}|Acc]) + end; +parse_hd_value(<< C, Rest/bits >>, Acc, Name, SoFar) -> + parse_hd_value(Rest, Acc, Name, << SoFar/binary, C >>). + -ifdef(TEST). -parse_fullhost_test() -> - {<<"example.org">>, 8080} = parse_fullhost(<<"example.org:8080">>), - {<<"example.org">>, undefined} = parse_fullhost(<<"example.org">>), - {<<"192.0.2.1">>, 8080} = parse_fullhost(<<"192.0.2.1:8080">>), - {<<"192.0.2.1">>, undefined} = parse_fullhost(<<"192.0.2.1">>), - {<<"[2001:db8::1]">>, 8080} = parse_fullhost(<<"[2001:db8::1]:8080">>), - {<<"[2001:db8::1]">>, undefined} = parse_fullhost(<<"[2001:db8::1]">>), - {<<"[::ffff:192.0.2.1]">>, 8080} - = parse_fullhost(<<"[::ffff:192.0.2.1]:8080">>), - {<<"[::ffff:192.0.2.1]">>, undefined} - = parse_fullhost(<<"[::ffff:192.0.2.1]">>), - ok. +parse_headers_test_() -> + Tests = [ + {<<"\r\nRest">>, + {[], <<"Rest">>}}, + {<<"Server: Erlang/R17\r\n" + "Date: Sun, 23 Feb 2014 09:30:39 GMT\r\n" + "Multiline-Header: why hello!\r\n" + " I didn't see you all the way over there!\r\n" + "Content-Length: 12\r\n" + "Content-Type: text/plain\r\n" + "\r\nRest">>, + {[{<<"server">>, <<"Erlang/R17">>}, + {<<"date">>, <<"Sun, 23 Feb 2014 09:30:39 GMT">>}, + {<<"multiline-header">>, + <<"why hello! I didn't see you all the way over there!">>}, + {<<"content-length">>, <<"12">>}, + {<<"content-type">>, <<"text/plain">>}], + <<"Rest">>}} + ], + [{V, fun() -> R = parse_headers(V) end} + || {V, R} <- Tests]. + +parse_headers_error_test_() -> + Tests = [ + <<>>, + <<"\r">>, + <<"Malformed\r\n\r\n">>, + <<"content-type: text/plain\r\nMalformed\r\n\r\n">>, + <<"HTTP/1.1 200 OK\r\n\r\n">>, + <<0:80, "\r\n\r\n">>, + <<"content-type: text/plain\r\ncontent-length: 12\r\n">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_headers(V)) end} + || V <- Tests]. + +horse_parse_headers() -> + horse:repeat(50000, + parse_headers(<<"Server: Erlang/R17\r\n" + "Date: Sun, 23 Feb 2014 09:30:39 GMT\r\n" + "Multiline-Header: why hello!\r\n" + " I didn't see you all the way over there!\r\n" + "Content-Length: 12\r\n" + "Content-Type: text/plain\r\n" + "\r\nRest">>) + ). -endif. %% @doc Extract path and query string from a binary. @@ -108,11 +222,9 @@ parse_fullpath_test() -> %% @doc Convert an HTTP version to atom. --spec parse_version(binary()) -> 'HTTP/1.1' | 'HTTP/1.0'. -parse_version(<<"HTTP/1.1">>) -> - 'HTTP/1.1'; -parse_version(<<"HTTP/1.0">>) -> - 'HTTP/1.0'. +-spec parse_version(binary()) -> version(). +parse_version(<<"HTTP/1.1">>) -> 'HTTP/1.1'; +parse_version(<<"HTTP/1.0">>) -> 'HTTP/1.0'. -ifdef(TEST). parse_version_test() -> @@ -121,3 +233,26 @@ parse_version_test() -> {'EXIT', _} = (catch parse_version(<<"HTTP/1.2">>)), ok. -endif. + +%% @doc Return formatted request-line and headers. +%% @todo Add tests when the corresponding reverse functions are added. + +-spec request(binary(), iodata(), version(), headers()) -> iodata(). +request(Method, Path, Version, Headers) -> + [Method, <<" ">>, Path, <<" ">>, version(Version), <<"\r\n">>, + [[N, <<": ">>, V, <<"\r\n">>] || {N, V} <- Headers], + <<"\r\n">>]. + +%% @doc Return the version as a binary. + +-spec version(version()) -> binary(). +version('HTTP/1.1') -> <<"HTTP/1.1">>; +version('HTTP/1.0') -> <<"HTTP/1.0">>. + +-ifdef(TEST). +version_test() -> + <<"HTTP/1.1">> = version('HTTP/1.1'), + <<"HTTP/1.0">> = version('HTTP/1.0'), + {'EXIT', _} = (catch version('HTTP/1.2')), + ok. +-endif. diff --git a/src/cow_http_hd.erl b/src/cow_http_hd.erl new file mode 100644 index 0000000..e47d80d --- /dev/null +++ b/src/cow_http_hd.erl @@ -0,0 +1,3019 @@ +%% Copyright (c) 2014-2015, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-module(cow_http_hd). + +-export([parse_accept/1]). +-export([parse_accept_charset/1]). +% @todo -export([parse_accept_datetime/1]). RFC7089 +-export([parse_accept_encoding/1]). +% @todo -export([parse_accept_features/1]). RFC2295 +-export([parse_accept_language/1]). +-export([parse_accept_ranges/1]). +% @todo -export([parse_access_control_allow_credentials/1]). CORS +% @todo -export([parse_access_control_allow_headers/1]). CORS +% @todo -export([parse_access_control_allow_methods/1]). CORS +% @todo -export([parse_access_control_allow_origin/1]). CORS +% @todo -export([parse_access_control_expose_headers/1]). CORS +% @todo -export([parse_access_control_max_age/1]). CORS +% @todo -export([parse_access_control_request_headers/1]). CORS +% @todo -export([parse_access_control_request_method/1]). CORS +-export([parse_age/1]). +-export([parse_allow/1]). +% @todo -export([parse_alternates/1]). RFC2295 +% @todo -export([parse_authentication_info/1]). RFC2617 +-export([parse_authorization/1]). +-export([parse_cache_control/1]). +-export([parse_connection/1]). +% @todo -export([parse_content_disposition/1]). RFC6266 +-export([parse_content_encoding/1]). +-export([parse_content_language/1]). +-export([parse_content_length/1]). +% @todo -export([parse_content_location/1]). RFC7231 +% @todo -export([parse_content_md5/1]). RFC2616 (deprecated) +-export([parse_content_range/1]). +% @todo -export([parse_content_security_policy/1]). CSP +% @todo -export([parse_content_security_policy_report_only/1]). CSP +-export([parse_content_type/1]). +% @todo -export([parse_cookie/1]). RFC6265 +-export([parse_date/1]). +% @todo -export([parse_digest/1]). RFC3230 +% @todo -export([parse_dnt/1]). http://donottrack.us/ +-export([parse_etag/1]). +-export([parse_expect/1]). +-export([parse_expires/1]). +% @todo -export([parse_forwarded/1]). RFC7239 +% @todo -export([parse_from/1]). RFC7231 +-export([parse_host/1]). +% @todo -export([parse_http2_settings/1]). HTTP/2 (upcoming) +-export([parse_if_match/1]). +-export([parse_if_modified_since/1]). +-export([parse_if_none_match/1]). +-export([parse_if_range/1]). +-export([parse_if_unmodified_since/1]). +% @todo -export([parse_last_event_id/1]). eventsource +-export([parse_last_modified/1]). +% @todo -export([parse_link/1]). RFC5988 +% @todo -export([parse_location/1]). RFC7231 +-export([parse_max_forwards/1]). +% @todo -export([parse_memento_datetime/1]). RFC7089 +% @todo -export([parse_negotiate/1]). RFC2295 +% @todo -export([parse_origin/1]). CORS, RFC6454 +-export([parse_pragma/1]). +% @todo -export([parse_prefer/1]). RFC7240 +-export([parse_proxy_authenticate/1]). +% @todo -export([parse_proxy_authentication_info/1]). RFC2617 +-export([parse_proxy_authorization/1]). +% @todo -export([parse_proxy_support/1]). RFC4559 +% @todo -export([parse_public_key_pins/1]). Key Pinning (upcoming) +% @todo -export([parse_public_key_pins_report_only/1]). Key Pinning (upcoming) +-export([parse_range/1]). +% @todo -export([parse_referer/1]). RFC7231 +% @todo -export([parse_refresh/1]). Non-standard (examples: "5", "5; url=http://example.com/") +-export([parse_retry_after/1]). +-export([parse_sec_websocket_accept/1]). +-export([parse_sec_websocket_extensions/1]). +-export([parse_sec_websocket_key/1]). +% @todo -export([parse_sec_websocket_origin/1]). Websocket drafts 7 and 8 +-export([parse_sec_websocket_protocol_req/1]). +-export([parse_sec_websocket_protocol_resp/1]). +-export([parse_sec_websocket_version_req/1]). +-export([parse_sec_websocket_version_resp/1]). +% @todo -export([parse_server/1]). RFC7231 +% @todo -export([parse_set_cookie/1]). RFC6265 +% @todo -export([parse_strict_transport_security/1]). RFC6797 +% @todo -export([parse_tcn/1]). RFC2295 +-export([parse_te/1]). +-export([parse_trailer/1]). +-export([parse_transfer_encoding/1]). +-export([parse_upgrade/1]). +% @todo -export([parse_user_agent/1]). RFC7231 +% @todo -export([parse_variant_vary/1]). RFC2295 +-export([parse_vary/1]). +% @todo -export([parse_via/1]). RFC7230 +% @todo -export([parse_want_digest/1]). RFC3230 +% @todo -export([parse_warning/1]). RFC7234 +-export([parse_www_authenticate/1]). +% @todo -export([parse_x_content_duration/1]). Gecko/MDN (value: float) +% @todo -export([parse_x_dns_prefetch_control/1]). Various (value: "on"|"off") +-export([parse_x_forwarded_for/1]). +% @todo -export([parse_x_frame_options/1]). RFC7034 + +-type etag() :: {weak | strong, binary()}. +-export_type([etag/0]). + +-type media_type() :: {binary(), binary(), [{binary(), binary()}]}. +-export_type([media_type/0]). + +-type qvalue() :: 0..1000. +-export_type([qvalue/0]). + +-type websocket_version() :: 0..255. +-export_type([websocket_version/0]). + +-include("cow_inline.hrl"). +-include("cow_parse.hrl"). + +-ifdef(TEST). +-include_lib("triq/include/triq.hrl"). + +vector(Min, Max, Dom) -> ?LET(N, choose(Min, Max), vector(N, Dom)). +small_list(Dom) -> vector(0, 10, Dom). +small_non_empty_list(Dom) -> vector(1, 10, Dom). + +alpha_chars() -> "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ". +alphanum_chars() -> "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ". +digit_chars() -> "0123456789". + +ows() -> list(elements([$\s, $\t])). +alpha() -> elements(alpha_chars()). +alphanum() -> elements(alphanum_chars()). +digit() -> elements(digit_chars()). + +tchar() -> + frequency([ + {1, elements([$!, $#, $$, $%, $&, $', $*, $+, $-, $., $^, $_, $`, $|, $~])}, + {99, elements(alphanum_chars())} + ]). + +token() -> + ?LET(T, + non_empty(list(tchar())), + list_to_binary(T)). + +abnf_char() -> + int(1, 127). + +vchar() -> + int(33, 126). + +obs_text() -> + int(128, 255). + +qdtext() -> + frequency([ + {99, elements("\t\s!#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~")}, + {1, obs_text()} + ]). + +quoted_pair() -> + [$\\, frequency([ + {99, elements("\t\s!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")}, + {1, obs_text()} + ])]. + +quoted_string() -> + [$", list(frequency([{100, qdtext()}, {1, quoted_pair()}])), $"]. + +%% Helper function for ( token / quoted-string ) values. +unquote([$", V, $"]) -> unquote(V, <<>>); +unquote(V) -> V. + +unquote([], Acc) -> Acc; +unquote([[$\\, C]|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>); +unquote([C|Tail], Acc) -> unquote(Tail, << Acc/binary, C >>). + +parameter() -> + ?SUCHTHAT({K, _, _, _}, + {token(), oneof([token(), quoted_string()]), ows(), ows()}, + K =/= <<"q">>). + +weight() -> + frequency([ + {90, int(0, 1000)}, + {10, undefined} + ]). + +%% Helper function for weight's qvalue formatting. +qvalue_to_iodata(0) -> <<"0">>; +qvalue_to_iodata(Q) when Q < 10 -> [<<"0.00">>, integer_to_binary(Q)]; +qvalue_to_iodata(Q) when Q < 100 -> [<<"0.0">>, integer_to_binary(Q)]; +qvalue_to_iodata(Q) when Q < 1000 -> [<<"0.">>, integer_to_binary(Q)]; +qvalue_to_iodata(1000) -> <<"1">>. +-endif. + +%% @doc Parse the Accept header. + +-spec parse_accept(binary()) -> [{media_type(), qvalue(), [binary() | {binary(), binary()}]}]. +parse_accept(<<"*/*">>) -> + [{{<<"*">>, <<"*">>, []}, 1000, []}]; +parse_accept(Accept) -> + media_range_list(Accept, []). + +media_range_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> ?LOWER(media_range_type, R, Acc, <<>>); +media_range_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> media_range_list(R, Acc); +media_range_list(<<>>, Acc) -> lists:reverse(Acc). + +media_range_type(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) -> ?LOWER(media_range_type, R, Acc, T); +media_range_type(<< $/, C, R/bits >>, Acc, T) when ?IS_TOKEN(C) -> ?LOWER(media_range_subtype, R, Acc, T, <<>>); +%% Special clause for badly behaving user agents that send * instead of */*. +media_range_type(<< $;, R/bits >>, Acc, <<"*">>) -> media_range_before_param(R, Acc, <<"*">>, <<"*">>, []). + +media_range_subtype(<< C, R/bits >>, Acc, T, S) when ?IS_TOKEN(C) -> ?LOWER(media_range_subtype, R, Acc, T, S); +media_range_subtype(R, Acc, T, S) -> media_range_param_sep(R, Acc, T, S, []). + +media_range_param_sep(<<>>, Acc, T, S, P) -> lists:reverse([{{T, S, lists:reverse(P)}, 1000, []}|Acc]); +media_range_param_sep(<< $,, R/bits >>, Acc, T, S, P) -> media_range_list(R, [{{T, S, lists:reverse(P)}, 1000, []}|Acc]); +media_range_param_sep(<< $;, R/bits >>, Acc, T, S, P) -> media_range_before_param(R, Acc, T, S, P); +media_range_param_sep(<< C, R/bits >>, Acc, T, S, P) when ?IS_WS(C) -> media_range_param_sep(R, Acc, T, S, P). + +media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_WS(C) -> media_range_before_param(R, Acc, T, S, P); +media_range_before_param(<< $q, $=, R/bits >>, Acc, T, S, P) -> media_range_weight(R, Acc, T, S, P); +media_range_before_param(<< C, R/bits >>, Acc, T, S, P) when ?IS_TOKEN(C) -> ?LOWER(media_range_param, R, Acc, T, S, P, <<>>). + +media_range_param(<< $=, $", R/bits >>, Acc, T, S, P, K) -> media_range_quoted(R, Acc, T, S, P, K, <<>>); +media_range_param(<< $=, C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) -> media_range_value(R, Acc, T, S, P, K, << C >>); +media_range_param(<< C, R/bits >>, Acc, T, S, P, K) when ?IS_TOKEN(C) -> ?LOWER(media_range_param, R, Acc, T, S, P, K). + +media_range_quoted(<< $", R/bits >>, Acc, T, S, P, K, V) -> media_range_param_sep(R, Acc, T, S, [{K, V}|P]); +media_range_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR_OBS(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>); +media_range_quoted(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_VCHAR_OBS(C) -> media_range_quoted(R, Acc, T, S, P, K, << V/binary, C >>). + +media_range_value(<< C, R/bits >>, Acc, T, S, P, K, V) when ?IS_TOKEN(C) -> media_range_value(R, Acc, T, S, P, K, << V/binary, C >>); +media_range_value(R, Acc, T, S, P, K, V) -> media_range_param_sep(R, Acc, T, S, [{K, V}|P]). + +media_range_weight(<< "1.000", R/bits >>, Acc, T, S, P) -> accept_ext_sep(R, Acc, T, S, P, 1000, []); +media_range_weight(<< "1.00", R/bits >>, Acc, T, S, P) -> accept_ext_sep(R, Acc, T, S, P, 1000, []); +media_range_weight(<< "1.0", R/bits >>, Acc, T, S, P) -> accept_ext_sep(R, Acc, T, S, P, 1000, []); +media_range_weight(<< "1.", R/bits >>, Acc, T, S, P) -> accept_ext_sep(R, Acc, T, S, P, 1000, []); +media_range_weight(<< "1", R/bits >>, Acc, T, S, P) -> accept_ext_sep(R, Acc, T, S, P, 1000, []); +media_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T, S, P) when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) -> + accept_ext_sep(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []); +media_range_weight(<< "0.", A, B, R/bits >>, Acc, T, S, P) when ?IS_DIGIT(A), ?IS_DIGIT(B) -> + accept_ext_sep(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []); +media_range_weight(<< "0.", A, R/bits >>, Acc, T, S, P) when ?IS_DIGIT(A) -> + accept_ext_sep(R, Acc, T, S, P, (A - $0) * 100, []); +media_range_weight(<< "0.", R/bits >>, Acc, T, S, P) -> accept_ext_sep(R, Acc, T, S, P, 0, []); +media_range_weight(<< "0", R/bits >>, Acc, T, S, P) -> accept_ext_sep(R, Acc, T, S, P, 0, []); +%% Special clauses for badly behaving user agents that send .123 instead of 0.123. +media_range_weight(<< ".", A, B, C, R/bits >>, Acc, T, S, P) when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) -> + accept_ext_sep(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10 + (C - $0), []); +media_range_weight(<< ".", A, B, R/bits >>, Acc, T, S, P) when ?IS_DIGIT(A), ?IS_DIGIT(B) -> + accept_ext_sep(R, Acc, T, S, P, (A - $0) * 100 + (B - $0) * 10, []); +media_range_weight(<< ".", A, R/bits >>, Acc, T, S, P) when ?IS_DIGIT(A) -> + accept_ext_sep(R, Acc, T, S, P, (A - $0) * 100, []). + +accept_ext_sep(<<>>, Acc, T, S, P, Q, E) -> lists:reverse([{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]); +accept_ext_sep(<< $,, R/bits >>, Acc, T, S, P, Q, E) -> media_range_list(R, [{{T, S, lists:reverse(P)}, Q, lists:reverse(E)}|Acc]); +accept_ext_sep(<< $;, R/bits >>, Acc, T, S, P, Q, E) -> accept_before_ext(R, Acc, T, S, P, Q, E); +accept_ext_sep(<< C, R/bits >>, Acc, T, S, P, Q, E) when ?IS_WS(C) -> accept_ext_sep(R, Acc, T, S, P, Q, E). + +accept_before_ext(<< C, R/bits >>, Acc, T, S, P, Q, E) when ?IS_WS(C) -> accept_before_ext(R, Acc, T, S, P, Q, E); +accept_before_ext(<< C, R/bits >>, Acc, T, S, P, Q, E) when ?IS_TOKEN(C) -> ?LOWER(accept_ext, R, Acc, T, S, P, Q, E, <<>>). + +accept_ext(<< $=, $", R/bits >>, Acc, T, S, P, Q, E, K) -> accept_quoted(R, Acc, T, S, P, Q, E, K, <<>>); +accept_ext(<< $=, C, R/bits >>, Acc, T, S, P, Q, E, K) when ?IS_TOKEN(C) -> accept_value(R, Acc, T, S, P, Q, E, K, << C >>); +accept_ext(<< C, R/bits >>, Acc, T, S, P, Q, E, K) when ?IS_TOKEN(C) -> ?LOWER(accept_ext, R, Acc, T, S, P, Q, E, K); +accept_ext(R, Acc, T, S, P, Q, E, K) -> accept_ext_sep(R, Acc, T, S, P, Q, [K|E]). + +accept_quoted(<< $", R/bits >>, Acc, T, S, P, Q, E, K, V) -> accept_ext_sep(R, Acc, T, S, P, Q, [{K, V}|E]); +accept_quoted(<< $\\, C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR_OBS(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>); +accept_quoted(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_VCHAR_OBS(C) -> accept_quoted(R, Acc, T, S, P, Q, E, K, << V/binary, C >>). + +accept_value(<< C, R/bits >>, Acc, T, S, P, Q, E, K, V) when ?IS_TOKEN(C) -> accept_value(R, Acc, T, S, P, Q, E, K, << V/binary, C >>); +accept_value(R, Acc, T, S, P, Q, E, K, V) -> accept_ext_sep(R, Acc, T, S, P, Q, [{K, V}|E]). + +-ifdef(TEST). +accept_ext() -> + oneof([token(), parameter()]). + +accept_params() -> + frequency([ + {90, []}, + {10, small_list(accept_ext())} + ]). + +accept() -> + ?LET({T, S, P, W, E}, + {token(), token(), small_list(parameter()), weight(), accept_params()}, + {T, S, P, W, E, iolist_to_binary([T, $/, S, + [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P], + case W of + undefined -> []; + _ -> [ + [<<";q=">>, qvalue_to_iodata(W)], + [case Ext of + {K, V, OWS1, OWS2} -> [OWS1, $;, OWS2, K, $=, V]; + K -> [$;, K] + end || Ext <- E]] + end])} + ). + +prop_parse_accept() -> + ?FORALL(L, + vector(1, 50, accept()), + begin + << _, Accept/binary >> = iolist_to_binary([[$,, A] || {_, _, _, _, _, A} <- L]), + ResL = parse_accept(Accept), + CheckedL = [begin + ExpectedP = [{?LOWER(K), unquote(V)} || {K, V, _, _} <- P], + ExpectedE = [case Ext of + {K, V, _, _} -> {?LOWER(K), unquote(V)}; + K -> ?LOWER(K) + end || Ext <- E], + ResT =:= ?LOWER(T) + andalso ResS =:= ?LOWER(S) + andalso ResP =:= ExpectedP + andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000)) + andalso ((W =:= undefined andalso ResE =:= []) orelse (W =/= undefined andalso ResE =:= ExpectedE)) + end || {{T, S, P, W, E, _}, {{ResT, ResS, ResP}, ResW, ResE}} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end + ). + +parse_accept_test_() -> + Tests = [ + {<<>>, []}, + {<<" ">>, []}, + {<<"audio/*; q=0.2, audio/basic">>, [ + {{<<"audio">>, <<"*">>, []}, 200, []}, + {{<<"audio">>, <<"basic">>, []}, 1000, []} + ]}, + {<<"text/plain; q=0.5, text/html, " + "text/x-dvi; q=0.8, text/x-c">>, [ + {{<<"text">>, <<"plain">>, []}, 500, []}, + {{<<"text">>, <<"html">>, []}, 1000, []}, + {{<<"text">>, <<"x-dvi">>, []}, 800, []}, + {{<<"text">>, <<"x-c">>, []}, 1000, []} + ]}, + {<<"text/*, text/html, text/html;level=1, */*">>, [ + {{<<"text">>, <<"*">>, []}, 1000, []}, + {{<<"text">>, <<"html">>, []}, 1000, []}, + {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []}, + {{<<"*">>, <<"*">>, []}, 1000, []} + ]}, + {<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, " + "text/html;level=2;q=0.4, */*;q=0.5">>, [ + {{<<"text">>, <<"*">>, []}, 300, []}, + {{<<"text">>, <<"html">>, []}, 700, []}, + {{<<"text">>, <<"html">>, [{<<"level">>, <<"1">>}]}, 1000, []}, + {{<<"text">>, <<"html">>, [{<<"level">>, <<"2">>}]}, 400, []}, + {{<<"*">>, <<"*">>, []}, 500, []} + ]}, + {<<"text/html;level=1;quoted=\"hi hi hi\";" + "q=0.123;standalone;complex=gits, text/plain">>, [ + {{<<"text">>, <<"html">>, + [{<<"level">>, <<"1">>}, {<<"quoted">>, <<"hi hi hi">>}]}, 123, + [<<"standalone">>, {<<"complex">>, <<"gits">>}]}, + {{<<"text">>, <<"plain">>, []}, 1000, []} + ]}, + {<<"text/html, image/gif, image/jpeg, *; q=.2, */*; q=.2">>, [ + {{<<"text">>, <<"html">>, []}, 1000, []}, + {{<<"image">>, <<"gif">>, []}, 1000, []}, + {{<<"image">>, <<"jpeg">>, []}, 1000, []}, + {{<<"*">>, <<"*">>, []}, 200, []}, + {{<<"*">>, <<"*">>, []}, 200, []} + ]} + ], + [{V, fun() -> R = parse_accept(V) end} || {V, R} <- Tests]. + +parse_accept_error_test_() -> + Tests = [ + <<"audio/basic, */;q=0.5">>, + <<"audio/, audio/basic">>, + <<"aud\tio/basic">>, + <<"audio/basic;t=\"zero \\", 0, " woo\"">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_accept(V)) end} || V <- Tests]. + +horse_parse_accept() -> + horse:repeat(20000, + parse_accept(<<"text/*;q=0.3, text/html;q=0.7, text/html;level=1, " + "text/html;level=2;q=0.4, */*;q=0.5">>) + ). +-endif. + +%% @doc Parse the Accept-Charset header. + +-spec parse_accept_charset(binary()) -> [{binary(), qvalue()}]. +parse_accept_charset(Charset) -> + nonempty(conneg_list(Charset, [])). + +conneg_list(<<>>, Acc) -> lists:reverse(Acc); +conneg_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> conneg_list(R, Acc); +conneg_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> ?LOWER(conneg, R, Acc, <<>>). + +conneg(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) -> ?LOWER(conneg, R, Acc, T); +conneg(R, Acc, T) -> conneg_param_sep(R, Acc, T). + +conneg_param_sep(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]); +conneg_param_sep(<< $,, R/bits >>, Acc, T) -> conneg_list(R, [{T, 1000}|Acc]); +conneg_param_sep(<< $;, R/bits >>, Acc, T) -> conneg_before_weight(R, Acc, T); +conneg_param_sep(<< C, R/bits >>, Acc, T) when ?IS_WS(C) -> conneg_param_sep(R, Acc, T). + +conneg_before_weight(<< C, R/bits >>, Acc, T) when ?IS_WS(C) -> conneg_before_weight(R, Acc, T); +conneg_before_weight(<< $q, $=, R/bits >>, Acc, T) -> conneg_weight(R, Acc, T); +%% Special clause for broken user agents that confuse ; and , separators. +conneg_before_weight(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) -> ?LOWER(conneg, R, [{T, 1000}|Acc], <<>>). + +conneg_weight(<< "1.000", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]); +conneg_weight(<< "1.00", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]); +conneg_weight(<< "1.0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]); +conneg_weight(<< "1.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]); +conneg_weight(<< "1", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 1000}|Acc]); +conneg_weight(<< "0.", A, B, C, R/bits >>, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) -> + conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]); +conneg_weight(<< "0.", A, B, R/bits >>, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B) -> + conneg_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]); +conneg_weight(<< "0.", A, R/bits >>, Acc, T) when ?IS_DIGIT(A) -> + conneg_list_sep(R, [{T, (A - $0) * 100}|Acc]); +conneg_weight(<< "0.", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]); +conneg_weight(<< "0", R/bits >>, Acc, T) -> conneg_list_sep(R, [{T, 0}|Acc]). + +conneg_list_sep(<<>>, Acc) -> lists:reverse(Acc); +conneg_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> conneg_list_sep(R, Acc); +conneg_list_sep(<< $,, R/bits >>, Acc) -> conneg_list(R, Acc). + +-ifdef(TEST). +accept_charset() -> + ?LET({C, W}, + {token(), weight()}, + {C, W, iolist_to_binary([C, case W of + undefined -> []; + _ -> [<<";q=">>, qvalue_to_iodata(W)] + end])} + ). + +prop_parse_accept_charset() -> + ?FORALL(L, + non_empty(list(accept_charset())), + begin + << _, AcceptCharset/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]), + ResL = parse_accept_charset(AcceptCharset), + CheckedL = [begin + ResC =:= ?LOWER(Ch) + andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000)) + end || {{Ch, W, _}, {ResC, ResW}} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end). + +parse_accept_charset_test_() -> + Tests = [ + {<<"iso-8859-5, unicode-1-1;q=0.8">>, [ + {<<"iso-8859-5">>, 1000}, + {<<"unicode-1-1">>, 800} + ]}, + %% Some user agents send this invalid value for the Accept-Charset header + {<<"ISO-8859-1;utf-8;q=0.7,*;q=0.7">>, [ + {<<"iso-8859-1">>, 1000}, + {<<"utf-8">>, 700}, + {<<"*">>, 700} + ]} + ], + [{V, fun() -> R = parse_accept_charset(V) end} || {V, R} <- Tests]. + +parse_accept_charset_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_accept_charset(V)) end} || V <- Tests]. + +horse_parse_accept_charset() -> + horse:repeat(20000, + parse_accept_charset(<<"iso-8859-5, unicode-1-1;q=0.8">>) + ). +-endif. + +%% @doc Parse the Accept-Encoding header. + +-spec parse_accept_encoding(binary()) -> [{binary(), qvalue()}]. +parse_accept_encoding(Encoding) -> + conneg_list(Encoding, []). + +-ifdef(TEST). +accept_encoding() -> + ?LET({E, W}, + {token(), weight()}, + {E, W, iolist_to_binary([E, case W of + undefined -> []; + _ -> [<<";q=">>, qvalue_to_iodata(W)] + end])} + ). + +%% @todo This property seems useless, see prop_accept_charset. +prop_parse_accept_encoding() -> + ?FORALL(L, + non_empty(list(accept_encoding())), + begin + << _, AcceptEncoding/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]), + ResL = parse_accept_encoding(AcceptEncoding), + CheckedL = [begin + ResE =:= ?LOWER(E) + andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000)) + end || {{E, W, _}, {ResE, ResW}} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end). + +parse_accept_encoding_test_() -> + Tests = [ + {<<>>, []}, + {<<"*">>, [{<<"*">>, 1000}]}, + {<<"compress, gzip">>, [ + {<<"compress">>, 1000}, + {<<"gzip">>, 1000} + ]}, + {<<"compress;q=0.5, gzip;q=1.0">>, [ + {<<"compress">>, 500}, + {<<"gzip">>, 1000} + ]}, + {<<"gzip;q=1.0, identity; q=0.5, *;q=0">>, [ + {<<"gzip">>, 1000}, + {<<"identity">>, 500}, + {<<"*">>, 0} + ]} + ], + [{V, fun() -> R = parse_accept_encoding(V) end} || {V, R} <- Tests]. + +horse_parse_accept_encoding() -> + horse:repeat(20000, + parse_accept_encoding(<<"gzip;q=1.0, identity; q=0.5, *;q=0">>) + ). +-endif. + +%% @doc Parse the Accept-Language header. + +-spec parse_accept_language(binary()) -> [{binary(), qvalue()}]. +parse_accept_language(LanguageRange) -> + nonempty(language_range_list(LanguageRange, [])). + +language_range_list(<<>>, Acc) -> lists:reverse(Acc); +language_range_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> language_range_list(R, Acc); +language_range_list(<< $*, R/bits >>, Acc) -> language_range_param_sep(R, Acc, <<"*">>); +language_range_list(<< C, R/bits >>, Acc) when ?IS_ALPHA(C) -> + ?LOWER(language_range, R, Acc, 1, <<>>). + +language_range(<< $-, C, R/bits >>, Acc, _, T) when ?IS_ALPHANUM(C) -> + ?LOWER(language_range_sub, R, Acc, 1, << T/binary, $- >>); +language_range(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHA(C), N < 8 -> + ?LOWER(language_range, R, Acc, N + 1, T); +language_range(R, Acc, _, T) -> language_range_param_sep(R, Acc, T). + +language_range_sub(<< $-, R/bits >>, Acc, _, T) -> language_range_sub(R, Acc, 0, << T/binary, $- >>); +language_range_sub(<< C, R/bits >>, Acc, N, T) when ?IS_ALPHANUM(C), N < 8 -> + ?LOWER(language_range_sub, R, Acc, N + 1, T); +language_range_sub(R, Acc, _, T) -> language_range_param_sep(R, Acc, T). + +language_range_param_sep(<<>>, Acc, T) -> lists:reverse([{T, 1000}|Acc]); +language_range_param_sep(<< $,, R/bits >>, Acc, T) -> language_range_list(R, [{T, 1000}|Acc]); +language_range_param_sep(<< $;, R/bits >>, Acc, T) -> language_range_before_weight(R, Acc, T); +language_range_param_sep(<< C, R/bits >>, Acc, T) when ?IS_WS(C) -> language_range_param_sep(R, Acc, T). + +language_range_before_weight(<< C, R/bits >>, Acc, T) when ?IS_WS(C) -> language_range_before_weight(R, Acc, T); +language_range_before_weight(<< $q, $=, R/bits >>, Acc, T) -> language_range_weight(R, Acc, T); +%% Special clause for broken user agents that confuse ; and , separators. +language_range_before_weight(<< C, R/bits >>, Acc, T) when ?IS_ALPHA(C) -> + ?LOWER(language_range, R, [{T, 1000}|Acc], 1, <<>>). + +language_range_weight(<< "1.000", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]); +language_range_weight(<< "1.00", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]); +language_range_weight(<< "1.0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]); +language_range_weight(<< "1.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]); +language_range_weight(<< "1", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 1000}|Acc]); +language_range_weight(<< "0.", A, B, C, R/bits >>, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) -> + language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]); +language_range_weight(<< "0.", A, B, R/bits >>, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B) -> + language_range_list_sep(R, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]); +language_range_weight(<< "0.", A, R/bits >>, Acc, T) when ?IS_DIGIT(A) -> + language_range_list_sep(R, [{T, (A - $0) * 100}|Acc]); +language_range_weight(<< "0.", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]); +language_range_weight(<< "0", R/bits >>, Acc, T) -> language_range_list_sep(R, [{T, 0}|Acc]). + +language_range_list_sep(<<>>, Acc) -> lists:reverse(Acc); +language_range_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> language_range_list_sep(R, Acc); +language_range_list_sep(<< $,, R/bits >>, Acc) -> language_range_list(R, Acc). + +-ifdef(TEST). +language_range_tag() -> + vector(1, 8, alpha()). + +language_range_subtag() -> + [$-, vector(1, 8, alphanum())]. + +language_range() -> + [language_range_tag(), small_list(language_range_subtag())]. + +accept_language() -> + ?LET({R, W}, + {language_range(), weight()}, + {iolist_to_binary(R), W, iolist_to_binary([R, case W of + undefined -> []; + _ -> [<<";q=">>, qvalue_to_iodata(W)] + end])} + ). + +prop_parse_accept_language() -> + ?FORALL(L, + non_empty(list(accept_language())), + begin + << _, AcceptLanguage/binary >> = iolist_to_binary([[$,, A] || {_, _, A} <- L]), + ResL = parse_accept_language(AcceptLanguage), + CheckedL = [begin + ResR =:= ?LOWER(R) + andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000)) + end || {{R, W, _}, {ResR, ResW}} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end). + +parse_accept_language_test_() -> + Tests = [ + {<<"da, en-gb;q=0.8, en;q=0.7">>, [ + {<<"da">>, 1000}, + {<<"en-gb">>, 800}, + {<<"en">>, 700} + ]}, + {<<"en, en-US, en-cockney, i-cherokee, x-pig-latin, es-419">>, [ + {<<"en">>, 1000}, + {<<"en-us">>, 1000}, + {<<"en-cockney">>, 1000}, + {<<"i-cherokee">>, 1000}, + {<<"x-pig-latin">>, 1000}, + {<<"es-419">>, 1000} + ]} + ], + [{V, fun() -> R = parse_accept_language(V) end} || {V, R} <- Tests]. + +parse_accept_language_error_test_() -> + Tests = [ + <<>>, + <<"loooooong">>, + <<"en-us-loooooong">>, + <<"419-en-us">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_accept_language(V)) end} || V <- Tests]. + +horse_parse_accept_language() -> + horse:repeat(20000, + parse_accept_language(<<"da, en-gb;q=0.8, en;q=0.7">>) + ). +-endif. + +%% @doc Parse the Accept-Ranges header. + +-spec parse_accept_ranges(binary()) -> [binary()]. +parse_accept_ranges(<<"none">>) -> []; +parse_accept_ranges(<<"bytes">>) -> [<<"bytes">>]; +parse_accept_ranges(AcceptRanges) -> + nonempty(token_ci_list(AcceptRanges, [])). + +-ifdef(TEST). +parse_accept_ranges_test_() -> + Tests = [ + {<<"bytes">>, [<<"bytes">>]}, + {<<"none">>, []}, + {<<"bytes, pages, kilos">>, [<<"bytes">>, <<"pages">>, <<"kilos">>]} + ], + [{V, fun() -> R = parse_accept_ranges(V) end} || {V, R} <- Tests]. + +parse_accept_ranges_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_accept_ranges(V)) end} || V <- Tests]. + +horse_parse_accept_ranges_none() -> + horse:repeat(200000, + parse_accept_ranges(<<"none">>) + ). + +horse_parse_accept_ranges_bytes() -> + horse:repeat(200000, + parse_accept_ranges(<<"bytes">>) + ). + +horse_parse_accept_ranges_other() -> + horse:repeat(200000, + parse_accept_ranges(<<"bytes, pages, kilos">>) + ). +-endif. + +%% @doc Parse the Age header. + +-spec parse_age(binary()) -> non_neg_integer(). +parse_age(Age) -> + I = binary_to_integer(Age), + true = I >= 0, + I. + +-ifdef(TEST). +parse_age_test_() -> + Tests = [ + {<<"0">>, 0}, + {<<"42">>, 42}, + {<<"69">>, 69}, + {<<"1337">>, 1337}, + {<<"3495">>, 3495}, + {<<"1234567890">>, 1234567890} + ], + [{V, fun() -> R = parse_age(V) end} || {V, R} <- Tests]. + +parse_age_error_test_() -> + Tests = [ + <<>>, + <<"123, 123">>, + <<"4.17">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_age(V)) end} || V <- Tests]. +-endif. + +%% @doc Parse the Allow header. + +-spec parse_allow(binary()) -> [binary()]. +parse_allow(Allow) -> + token_list(Allow, []). + +-ifdef(TEST). +allow() -> + ?LET(L, + list({ows(), ows(), token()}), + case L of + [] -> {[], <<>>}; + _ -> + << _, Allow/binary >> = iolist_to_binary([[OWS1, $,, OWS2, M] || {OWS1, OWS2, M} <- L]), + {[M || {_, _, M} <- L], Allow} + end). + +prop_parse_allow() -> + ?FORALL({L, Allow}, + allow(), + L =:= parse_allow(Allow)). + +parse_allow_test_() -> + Tests = [ + {<<>>, []}, + {<<"GET, HEAD, PUT">>, [<<"GET">>, <<"HEAD">>, <<"PUT">>]} + ], + [{V, fun() -> R = parse_allow(V) end} || {V, R} <- Tests]. + +horse_parse_allow() -> + horse:repeat(200000, + parse_allow(<<"GET, HEAD, PUT">>) + ). +-endif. + +%% @doc Parse the Authorization header. +%% +%% We support Basic, Digest and Bearer schemes only. +%% +%% In the Digest case we do not validate that the mandatory +%% fields are present. When parsing auth-params, we do not +%% accept BWS characters around the "=". + +-spec parse_authorization(binary()) + -> {basic, binary(), binary()} + | {bearer, binary()} + | {digest, [{binary(), binary()}]}. +parse_authorization(<<"Basic ", R/bits >>) -> + auth_basic(base64:decode(R), <<>>); +parse_authorization(<<"Bearer ", R/bits >>) when R =/= <<>> -> + validate_auth_bearer(R), + {bearer, R}; +parse_authorization(<<"Digest ", R/bits >>) -> + {digest, nonempty(auth_digest_list(R, []))}. + +auth_basic(<< $:, Password/bits >>, UserID) -> {basic, UserID, Password}; +auth_basic(<< C, R/bits >>, UserID) -> auth_basic(R, << UserID/binary, C >>). + +validate_auth_bearer(<< C, R/bits >>) when ?IS_TOKEN68(C) -> validate_auth_bearer(R); +validate_auth_bearer(<< $=, R/bits >>) -> validate_auth_bearer_eq(R); +validate_auth_bearer(<<>>) -> ok. + +validate_auth_bearer_eq(<< $=, R/bits >>) -> validate_auth_bearer_eq(R); +validate_auth_bearer_eq(<<>>) -> ok. + +auth_digest_list(<<>>, Acc) -> lists:reverse(Acc); +auth_digest_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> auth_digest_list(R, Acc); +auth_digest_list(<< "algorithm=", C, R/bits >>, Acc) when ?IS_TOKEN(C) -> auth_digest_token(R, Acc, <<"algorithm">>, << C >>); +auth_digest_list(<< "cnonce=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"cnonce">>, <<>>); +auth_digest_list(<< "nc=", A, B, C, D, E, F, G, H, R/bits >>, Acc) + when ?IS_LHEX(A), ?IS_LHEX(B), ?IS_LHEX(C), ?IS_LHEX(D), + ?IS_LHEX(E), ?IS_LHEX(F), ?IS_LHEX(G), ?IS_LHEX(H) -> + auth_digest_list_sep(R, [{<<"nc">>, << A, B, C, D, E, F, G, H >>}|Acc]); +auth_digest_list(<< "nonce=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"nonce">>, <<>>); +auth_digest_list(<< "opaque=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"opaque">>, <<>>); +auth_digest_list(<< "qop=", C, R/bits >>, Acc) when ?IS_TOKEN(C) -> auth_digest_token(R, Acc, <<"qop">>, << C >>); +auth_digest_list(<< "realm=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"realm">>, <<>>); +auth_digest_list(<< "response=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"response">>, <<>>); +auth_digest_list(<< "uri=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"uri">>, <<>>); +auth_digest_list(<< "username=\"", R/bits >>, Acc) -> auth_digest_quoted(R, Acc, <<"username">>, <<>>); +auth_digest_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> + ?LOWER(auth_digest_param, R, Acc, <<>>). + +auth_digest_param(<< $=, $", R/bits >>, Acc, K) -> auth_digest_quoted(R, Acc, K, <<>>); +auth_digest_param(<< $=, C, R/bits >>, Acc, K) when ?IS_TOKEN(C) -> auth_digest_token(R, Acc, K, << C >>); +auth_digest_param(<< C, R/bits >>, Acc, K) when ?IS_TOKEN(C) -> + ?LOWER(auth_digest_param, R, Acc, K). + +auth_digest_token(<< C, R/bits >>, Acc, K, V) when ?IS_TOKEN(C) -> auth_digest_token(R, Acc, K, << V/binary, C >>); +auth_digest_token(R, Acc, K, V) -> auth_digest_list_sep(R, [{K, V}|Acc]). + +auth_digest_quoted(<< $", R/bits >>, Acc, K, V) -> auth_digest_list_sep(R, [{K, V}|Acc]); +auth_digest_quoted(<< $\\, C, R/bits >>, Acc, K, V) when ?IS_VCHAR_OBS(C) -> auth_digest_quoted(R, Acc, K, << V/binary, C >>); +auth_digest_quoted(<< C, R/bits >>, Acc, K, V) when ?IS_VCHAR_OBS(C) -> auth_digest_quoted(R, Acc, K, << V/binary, C >>). + +auth_digest_list_sep(<<>>, Acc) -> lists:reverse(Acc); +auth_digest_list_sep(<< $,, R/bits >>, Acc) -> auth_digest_list(R, Acc); +auth_digest_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> auth_digest_list_sep(R, Acc). + +-ifdef(TEST). +parse_authorization_test_() -> + Tests = [ + {<<"Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==">>, {basic, <<"Aladdin">>, <<"open sesame">>}}, + {<<"Bearer mF_9.B5f-4.1JqM">>, {bearer, <<"mF_9.B5f-4.1JqM">>}}, + {<<"Digest username=\"Mufasa\"," + "realm=\"[email protected]\"," + "nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\"," + "uri=\"/dir/index.html\"," + "qop=auth," + "nc=00000001," + "cnonce=\"0a4f113b\"," + "response=\"6629fae49393a05397450978507c4ef1\"," + "opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"">>, + {digest, [ + {<<"username">>, <<"Mufasa">>}, + {<<"realm">>, <<"[email protected]">>}, + {<<"nonce">>, <<"dcd98b7102dd2f0e8b11d0f600bfb0c093">>}, + {<<"uri">>, <<"/dir/index.html">>}, + {<<"qop">>, <<"auth">>}, + {<<"nc">>, <<"00000001">>}, + {<<"cnonce">>, <<"0a4f113b">>}, + {<<"response">>, <<"6629fae49393a05397450978507c4ef1">>}, + {<<"opaque">>, <<"5ccc069c403ebaf9f0171e9517f40e41">>}]}} + ], + [{V, fun() -> R = parse_authorization(V) end} || {V, R} <- Tests]. + +horse_parse_authorization_basic() -> + horse:repeat(20000, + parse_authorization(<<"Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==">>) + ). + +horse_parse_authorization_bearer() -> + horse:repeat(20000, + parse_authorization(<<"Bearer mF_9.B5f-4.1JqM">>) + ). + +horse_parse_authorization_digest() -> + horse:repeat(20000, + parse_authorization( + <<"Digest username=\"Mufasa\"," + "realm=\"[email protected]\"," + "nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\"," + "uri=\"/dir/index.html\"," + "qop=auth," + "nc=00000001," + "cnonce=\"0a4f113b\"," + "response=\"6629fae49393a05397450978507c4ef1\"," + "opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"">>) + ). +-endif. + +%% @doc Parse the Cache-Control header. +%% +%% In the fields list case, we do not support escaping, which shouldn't be needed anyway. + +-spec parse_cache_control(binary()) + -> [binary() | {binary(), binary()} | {binary(), non_neg_integer()} | {binary(), [binary()]}]. +parse_cache_control(<<"no-cache">>) -> + [<<"no-cache">>]; +parse_cache_control(<<"max-age=0">>) -> + [{<<"max-age">>, 0}]; +parse_cache_control(CacheControl) -> + nonempty(cache_directive_list(CacheControl, [])). + +cache_directive_list(<<>>, Acc) -> lists:reverse(Acc); +cache_directive_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C)-> cache_directive_list(R, Acc); +cache_directive_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> + ?LOWER(cache_directive, R, Acc, <<>>). + +cache_directive(<< $=, $", R/bits >>, Acc, T) + when (T =:= <<"no-cache">>) or (T =:= <<"private">>) -> + cache_directive_fields_list(R, Acc, T, []); +cache_directive(<< $=, C, R/bits >>, Acc, T) + when ?IS_DIGIT(C), (T =:= <<"max-age">>) or (T =:= <<"max-stale">>) + or (T =:= <<"min-fresh">>) or (T =:= <<"s-maxage">>) -> + cache_directive_delta(R, Acc, T, (C - $0)); +cache_directive(<< $=, $", R/bits >>, Acc, T) -> cache_directive_quoted_string(R, Acc, T, <<>>); +cache_directive(<< $=, C, R/bits >>, Acc, T) when ?IS_TOKEN(C) -> cache_directive_token(R, Acc, T, << C >>); +cache_directive(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) -> + ?LOWER(cache_directive, R, Acc, T); +cache_directive(R, Acc, T) -> cache_directive_list_sep(R, [T|Acc]). + +cache_directive_delta(<< C, R/bits >>, Acc, K, V) when ?IS_DIGIT(C) -> cache_directive_delta(R, Acc, K, V * 10 + (C - $0)); +cache_directive_delta(R, Acc, K, V) -> cache_directive_list_sep(R, [{K, V}|Acc]). + +cache_directive_fields_list(<< C, R/bits >>, Acc, K, L) when ?IS_WS_COMMA(C) -> cache_directive_fields_list(R, Acc, K, L); +cache_directive_fields_list(<< $", R/bits >>, Acc, K, L) -> cache_directive_list_sep(R, [{K, lists:reverse(L)}|Acc]); +cache_directive_fields_list(<< C, R/bits >>, Acc, K, L) when ?IS_TOKEN(C) -> + ?LOWER(cache_directive_field, R, Acc, K, L, <<>>). + +cache_directive_field(<< C, R/bits >>, Acc, K, L, F) when ?IS_TOKEN(C) -> + ?LOWER(cache_directive_field, R, Acc, K, L, F); +cache_directive_field(R, Acc, K, L, F) -> cache_directive_fields_list_sep(R, Acc, K, [F|L]). + +cache_directive_fields_list_sep(<< C, R/bits >>, Acc, K, L) when ?IS_WS(C) -> cache_directive_fields_list_sep(R, Acc, K, L); +cache_directive_fields_list_sep(<< $,, R/bits >>, Acc, K, L) -> cache_directive_fields_list(R, Acc, K, L); +cache_directive_fields_list_sep(<< $", R/bits >>, Acc, K, L) -> cache_directive_list_sep(R, [{K, lists:reverse(L)}|Acc]). + +cache_directive_token(<< C, R/bits >>, Acc, K, V) when ?IS_TOKEN(C) -> cache_directive_token(R, Acc, K, << V/binary, C >>); +cache_directive_token(R, Acc, K, V) -> cache_directive_list_sep(R, [{K, V}|Acc]). + +cache_directive_quoted_string(<< $", R/bits >>, Acc, K, V) -> cache_directive_list_sep(R, [{K, V}|Acc]); +cache_directive_quoted_string(<< $\\, C, R/bits >>, Acc, K, V) when ?IS_VCHAR_OBS(C) -> + cache_directive_quoted_string(R, Acc, K, << V/binary, C >>); +cache_directive_quoted_string(<< C, R/bits >>, Acc, K, V) when ?IS_VCHAR_OBS(C) -> + cache_directive_quoted_string(R, Acc, K, << V/binary, C >>). + +cache_directive_list_sep(<<>>, Acc) -> lists:reverse(Acc); +cache_directive_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> cache_directive_list_sep(R, Acc); +cache_directive_list_sep(<< $,, R/bits >>, Acc) -> cache_directive_list(R, Acc). + +-ifdef(TEST). +cache_directive_unreserved_token() -> + ?SUCHTHAT(T, + token(), + T =/= <<"max-age">> andalso T =/= <<"max-stale">> andalso T =/= <<"min-fresh">> + andalso T =/= <<"s-maxage">> andalso T =/= <<"no-cache">> andalso T =/= <<"private">>). + +cache_directive() -> + oneof([ + token(), + {cache_directive_unreserved_token(), token()}, + {cache_directive_unreserved_token(), quoted_string()}, + {elements([<<"max-age">>, <<"max-stale">>, <<"min-fresh">>, <<"s-maxage">>]), non_neg_integer()}, + {fields, elements([<<"no-cache">>, <<"private">>]), small_list(token())} + ]). + +cache_control() -> + ?LET(L, + non_empty(list(cache_directive())), + begin + << _, CacheControl/binary >> = iolist_to_binary([[$,, + case C of + {fields, K, V} -> [K, $=, $", [[F, $,] || F <- V], $"]; + {K, V} when is_integer(V) -> [K, $=, integer_to_binary(V)]; + {K, V} -> [K, $=, V]; + K -> K + end] || C <- L]), + {L, CacheControl} + end). + +prop_parse_cache_control() -> + ?FORALL({L, CacheControl}, + cache_control(), + begin + ResL = parse_cache_control(CacheControl), + CheckedL = [begin + ExpectedCc = case Cc of + {fields, K, V} -> {?LOWER(K), [?LOWER(F) || F <- V]}; + {K, V} -> {?LOWER(K), unquote(V)}; + K -> ?LOWER(K) + end, + ExpectedCc =:= ResCc + end || {Cc, ResCc} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end). + +parse_cache_control_test_() -> + Tests = [ + {<<"no-cache">>, [<<"no-cache">>]}, + {<<"no-store">>, [<<"no-store">>]}, + {<<"max-age=0">>, [{<<"max-age">>, 0}]}, + {<<"max-age=30">>, [{<<"max-age">>, 30}]}, + {<<"private, community=\"UCI\"">>, [<<"private">>, {<<"community">>, <<"UCI">>}]}, + {<<"private=\"Content-Type, Content-Encoding, Content-Language\"">>, + [{<<"private">>, [<<"content-type">>, <<"content-encoding">>, <<"content-language">>]}]} + ], + [{V, fun() -> R = parse_cache_control(V) end} || {V, R} <- Tests]. + +parse_cache_control_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_cache_control(V)) end} || V <- Tests]. + +horse_parse_cache_control_no_cache() -> + horse:repeat(200000, + parse_cache_control(<<"no-cache">>) + ). + +horse_parse_cache_control_max_age_0() -> + horse:repeat(200000, + parse_cache_control(<<"max-age=0">>) + ). + +horse_parse_cache_control_max_age_30() -> + horse:repeat(200000, + parse_cache_control(<<"max-age=30">>) + ). + +horse_parse_cache_control_custom() -> + horse:repeat(200000, + parse_cache_control(<<"private, community=\"UCI\"">>) + ). + +horse_parse_cache_control_fields() -> + horse:repeat(200000, + parse_cache_control(<<"private=\"Content-Type, Content-Encoding, Content-Language\"">>) + ). +-endif. + +%% @doc Parse the Connection header. + +-spec parse_connection(binary()) -> [binary()]. +parse_connection(<<"close">>) -> + [<<"close">>]; +parse_connection(<<"keep-alive">>) -> + [<<"keep-alive">>]; +parse_connection(Connection) -> + nonempty(token_ci_list(Connection, [])). + +-ifdef(TEST). +prop_parse_connection() -> + ?FORALL(L, + non_empty(list(token())), + begin + << _, Connection/binary >> = iolist_to_binary([[$,, C] || C <- L]), + ResL = parse_connection(Connection), + CheckedL = [?LOWER(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end). + +parse_connection_test_() -> + Tests = [ + {<<"close">>, [<<"close">>]}, + {<<"ClOsE">>, [<<"close">>]}, + {<<"Keep-Alive">>, [<<"keep-alive">>]}, + {<<"keep-alive, Upgrade">>, [<<"keep-alive">>, <<"upgrade">>]} + ], + [{V, fun() -> R = parse_connection(V) end} || {V, R} <- Tests]. + +parse_connection_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_connection(V)) end} || V <- Tests]. + +horse_parse_connection_close() -> + horse:repeat(200000, + parse_connection(<<"close">>) + ). + +horse_parse_connection_keepalive() -> + horse:repeat(200000, + parse_connection(<<"keep-alive">>) + ). + +horse_parse_connection_keepalive_upgrade() -> + horse:repeat(200000, + parse_connection(<<"keep-alive, upgrade">>) + ). +-endif. + +%% @doc Parse the Content-Encoding header. + +-spec parse_content_encoding(binary()) -> [binary()]. +parse_content_encoding(ContentEncoding) -> + nonempty(token_ci_list(ContentEncoding, [])). + +-ifdef(TEST). +parse_content_encoding_test_() -> + Tests = [ + {<<"gzip">>, [<<"gzip">>]} + ], + [{V, fun() -> R = parse_content_encoding(V) end} || {V, R} <- Tests]. + +parse_content_encoding_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_content_encoding(V)) end} || V <- Tests]. + +horse_parse_content_encoding() -> + horse:repeat(200000, + parse_content_encoding(<<"gzip">>) + ). +-endif. + +%% @doc Parse the Content-Language header. +%% +%% We do not support irregular deprecated tags that do not match the ABNF. + +-spec parse_content_language(binary()) -> [binary()]. +parse_content_language(ContentLanguage) -> + nonempty(langtag_list(ContentLanguage, [])). + +langtag_list(<<>>, Acc) -> lists:reverse(Acc); +langtag_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> langtag_list(R, Acc); +langtag_list(<< A, B, C, R/bits >>, Acc) when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C) -> + langtag_extlang(R, Acc, << ?LC(A), ?LC(B), ?LC(C) >>, 0); +langtag_list(<< A, B, R/bits >>, Acc) when ?IS_ALPHA(A), ?IS_ALPHA(B) -> + langtag_extlang(R, Acc, << ?LC(A), ?LC(B) >>, 0); +langtag_list(<< X, R/bits >>, Acc) when X =:= $x; X =:= $X -> langtag_privateuse_sub(R, Acc, << $x >>, 0). + +langtag_extlang(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, _) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>); +langtag_extlang(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, _) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>); +langtag_extlang(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, _) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>); +langtag_extlang(<< $-, A, B, C, D, E, R/bits >>, Acc, T, _) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>); +langtag_extlang(<< $-, A, B, C, D, R/bits >>, Acc, T, _) + when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C), ?IS_ALPHA(D) -> + langtag_region(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>); +langtag_extlang(<< $-, A, B, C, R/bits >>, Acc, T, N) + when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C) -> + case N of + 2 -> langtag_script(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>); + _ -> langtag_extlang(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1) + end; +langtag_extlang(R, Acc, T, _) -> langtag_region(R, Acc, T). + +langtag_script(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>); +langtag_script(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>); +langtag_script(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>); +langtag_script(<< $-, A, B, C, D, E, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>); +langtag_script(<< $-, A, B, C, D, R/bits >>, Acc, T) + when ?IS_ALPHA(A), ?IS_ALPHA(B), ?IS_ALPHA(C), ?IS_ALPHA(D) -> + langtag_region(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>); +langtag_script(R, Acc, T) -> + langtag_region(R, Acc, T). + +langtag_region(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>); +langtag_region(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>); +langtag_region(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>); +langtag_region(<< $-, A, B, C, D, E, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>); +langtag_region(<< $-, A, B, C, D, R/bits >>, Acc, T) + when ?IS_DIGIT(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) -> + langtag_variant(R, Acc, << T/binary, $-, A, ?LC(B), ?LC(C), ?LC(D) >>); +langtag_region(<< $-, A, B, R/bits >>, Acc, T) when ?IS_ALPHA(A), ?IS_ALPHA(B) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>); +langtag_region(<< $-, A, B, C, R/bits >>, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) -> + langtag_variant(R, Acc, << T/binary, $-, A, B, C >>); +langtag_region(R, Acc, T) -> + langtag_variant(R, Acc, T). + +langtag_variant(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>); +langtag_variant(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>); +langtag_variant(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>); +langtag_variant(<< $-, A, B, C, D, E, R/bits >>, Acc, T) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) -> + langtag_variant(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>); +langtag_variant(<< $-, A, B, C, D, R/bits >>, Acc, T) + when ?IS_DIGIT(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) -> + langtag_variant(R, Acc, << T/binary, $-, A, ?LC(B), ?LC(C), ?LC(D) >>); +langtag_variant(R, Acc, T) -> + langtag_extension(R, Acc, T). + +langtag_extension(<< $-, X, R/bits >>, Acc, T) when X =:= $x; X =:= $X -> langtag_privateuse_sub(R, Acc, << T/binary, $-, $x >>, 0); +langtag_extension(<< $-, S, R/bits >>, Acc, T) when ?IS_ALPHANUM(S) -> langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(S) >>, 0); +langtag_extension(R, Acc, T) -> langtag_list_sep(R, [T|Acc]). + +langtag_extension_sub(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) -> + langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>, N + 1); +langtag_extension_sub(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) -> + langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>, N + 1); +langtag_extension_sub(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) -> + langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>, N + 1); +langtag_extension_sub(<< $-, A, B, C, D, E, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) -> + langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>, N + 1); +langtag_extension_sub(<< $-, A, B, C, D, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) -> + langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>, N + 1); +langtag_extension_sub(<< $-, A, B, C, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C) -> + langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1); +langtag_extension_sub(<< $-, A, B, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B) -> + langtag_extension_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>, N + 1); +langtag_extension_sub(R, Acc, T, N) when N > 0 -> + langtag_extension(R, Acc, T). + +langtag_privateuse_sub(<< $-, A, B, C, D, E, F, G, H, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G), ?IS_ALPHANUM(H) -> + langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G), ?LC(H) >>, N + 1); +langtag_privateuse_sub(<< $-, A, B, C, D, E, F, G, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F), ?IS_ALPHANUM(G) -> + langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F), ?LC(G) >>, N + 1); +langtag_privateuse_sub(<< $-, A, B, C, D, E, F, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), + ?IS_ALPHANUM(E), ?IS_ALPHANUM(F) -> + langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E), ?LC(F) >>, N + 1); +langtag_privateuse_sub(<< $-, A, B, C, D, E, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D), ?IS_ALPHANUM(E) -> + langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D), ?LC(E) >>, N + 1); +langtag_privateuse_sub(<< $-, A, B, C, D, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C), ?IS_ALPHANUM(D) -> + langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C), ?LC(D) >>, N + 1); +langtag_privateuse_sub(<< $-, A, B, C, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B), ?IS_ALPHANUM(C) -> + langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B), ?LC(C) >>, N + 1); +langtag_privateuse_sub(<< $-, A, B, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A), ?IS_ALPHANUM(B) -> + langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A), ?LC(B) >>, N + 1); +langtag_privateuse_sub(<< $-, A, R/bits >>, Acc, T, N) + when ?IS_ALPHANUM(A) -> + langtag_privateuse_sub(R, Acc, << T/binary, $-, ?LC(A) >>, N + 1); +langtag_privateuse_sub(R, Acc, T, N) when N > 0 -> langtag_list_sep(R, [T|Acc]). + +langtag_list_sep(<<>>, Acc) -> lists:reverse(Acc); +langtag_list_sep(<< $,, R/bits >>, Acc) -> langtag_list(R, Acc); +langtag_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> langtag_list_sep(R, Acc). + +-ifdef(TEST). +langtag_language() -> vector(2, 3, alpha()). +langtag_extlang() -> vector(0, 3, [$-, alpha(), alpha(), alpha()]). +langtag_script() -> oneof([[], [$-, alpha(), alpha(), alpha(), alpha()]]). +langtag_region() -> oneof([[], [$-, alpha(), alpha()], [$-, digit(), digit(), digit()]]). + +langtag_variant() -> + small_list(frequency([ + {4, [$-, vector(5, 8, alphanum())]}, + {1, [$-, digit(), alphanum(), alphanum(), alphanum()]} + ])). + +langtag_extension() -> + small_list([$-, ?SUCHTHAT(S, alphanum(), S =/= $x andalso S =/= $X), + small_non_empty_list([$-, vector(2, 8, alphanum())]) + ]). + +langtag_privateuse() -> oneof([[], [$-, langtag_privateuse_nodash()]]). +langtag_privateuse_nodash() -> [elements([$x, $X]), small_non_empty_list([$-, vector(1, 8, alphanum())])]. +private_language_tag() -> ?LET(T, langtag_privateuse_nodash(), iolist_to_binary(T)). + +language_tag() -> + ?LET(IoList, + [langtag_language(), langtag_extlang(), langtag_script(), langtag_region(), + langtag_variant(), langtag_extension(), langtag_privateuse()], + iolist_to_binary(IoList)). + +content_language() -> + ?LET(L, + non_empty(list(frequency([ + {90, language_tag()}, + {10, private_language_tag()} + ]))), + begin + << _, ContentLanguage/binary >> = iolist_to_binary([[$,, T] || T <- L]), + {L, ContentLanguage} + end). + +prop_parse_content_language() -> + ?FORALL({L, ContentLanguage}, + content_language(), + begin + ResL = parse_content_language(ContentLanguage), + CheckedL = [?LOWER(T) =:= ResT || {T, ResT} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end). + +parse_content_language_test_() -> + Tests = [ + {<<"de">>, [<<"de">>]}, + {<<"fr">>, [<<"fr">>]}, + {<<"ja">>, [<<"ja">>]}, + {<<"zh-Hant">>, [<<"zh-hant">>]}, + {<<"zh-Hans">>, [<<"zh-hans">>]}, + {<<"sr-Cyrl">>, [<<"sr-cyrl">>]}, + {<<"sr-Latn">>, [<<"sr-latn">>]}, + {<<"zh-cmn-Hans-CN">>, [<<"zh-cmn-hans-cn">>]}, + {<<"cmn-Hans-CN">>, [<<"cmn-hans-cn">>]}, + {<<"zh-yue-HK">>, [<<"zh-yue-hk">>]}, + {<<"yue-HK">>, [<<"yue-hk">>]}, + {<<"zh-Hans-CN">>, [<<"zh-hans-cn">>]}, + {<<"sr-Latn-RS">>, [<<"sr-latn-rs">>]}, + {<<"sl-rozaj">>, [<<"sl-rozaj">>]}, + {<<"sl-rozaj-biske">>, [<<"sl-rozaj-biske">>]}, + {<<"sl-nedis">>, [<<"sl-nedis">>]}, + {<<"de-CH-1901">>, [<<"de-ch-1901">>]}, + {<<"sl-IT-nedis">>, [<<"sl-it-nedis">>]}, + {<<"hy-Latn-IT-arevela">>, [<<"hy-latn-it-arevela">>]}, + {<<"de-DE">>, [<<"de-de">>]}, + {<<"en-US">>, [<<"en-us">>]}, + {<<"es-419">>, [<<"es-419">>]}, + {<<"de-CH-x-phonebk">>, [<<"de-ch-x-phonebk">>]}, + {<<"az-Arab-x-AZE-derbend">>, [<<"az-arab-x-aze-derbend">>]}, + {<<"x-whatever">>, [<<"x-whatever">>]}, + {<<"qaa-Qaaa-QM-x-southern">>, [<<"qaa-qaaa-qm-x-southern">>]}, + {<<"de-Qaaa">>, [<<"de-qaaa">>]}, + {<<"sr-Latn-QM">>, [<<"sr-latn-qm">>]}, + {<<"sr-Qaaa-RS">>, [<<"sr-qaaa-rs">>]}, + {<<"en-US-u-islamcal">>, [<<"en-us-u-islamcal">>]}, + {<<"zh-CN-a-myext-x-private">>, [<<"zh-cn-a-myext-x-private">>]}, + {<<"en-a-myext-b-another">>, [<<"en-a-myext-b-another">>]}, + {<<"mn-Cyrl-MN">>, [<<"mn-cyrl-mn">>]}, + {<<"MN-cYRL-mn">>, [<<"mn-cyrl-mn">>]}, + {<<"mN-cYrL-Mn">>, [<<"mn-cyrl-mn">>]}, + {<<"az-Arab-IR">>, [<<"az-arab-ir">>]}, + {<<"zh-gan">>, [<<"zh-gan">>]}, + {<<"zh-yue">>, [<<"zh-yue">>]}, + {<<"zh-cmn">>, [<<"zh-cmn">>]}, + {<<"de-AT">>, [<<"de-at">>]}, + {<<"de-CH-1996">>, [<<"de-ch-1996">>]}, + {<<"en-Latn-GB-boont-r-extended-sequence-x-private">>, + [<<"en-latn-gb-boont-r-extended-sequence-x-private">>]}, + {<<"el-x-koine">>, [<<"el-x-koine">>]}, + {<<"el-x-attic">>, [<<"el-x-attic">>]}, + {<<"fr, en-US, es-419, az-Arab, x-pig-latin, man-Nkoo-GN">>, + [<<"fr">>, <<"en-us">>, <<"es-419">>, <<"az-arab">>, <<"x-pig-latin">>, <<"man-nkoo-gn">>]}, + {<<"da">>, [<<"da">>]}, + {<<"mi, en">>, [<<"mi">>, <<"en">>]} + ], + [{V, fun() -> R = parse_content_language(V) end} || {V, R} <- Tests]. + +parse_content_language_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_content_language(V)) end} || V <- Tests]. + +horse_parse_content_language() -> + horse:repeat(100000, + parse_content_language(<<"fr, en-US, es-419, az-Arab, x-pig-latin, man-Nkoo-GN">>) + ). +-endif. + +%% @doc Parse the Content-Length header. +%% +%% The value has at least one digit, and may be followed by whitespace. + +-spec parse_content_length(binary()) -> non_neg_integer(). +parse_content_length(ContentLength) -> + I = binary_to_integer(ContentLength), + true = I >= 0, + I. + +-ifdef(TEST). +prop_parse_content_length() -> + ?FORALL( + X, + non_neg_integer(), + X =:= parse_content_length(integer_to_binary(X)) + ). + +parse_content_length_test_() -> + Tests = [ + {<<"0">>, 0}, + {<<"42">>, 42}, + {<<"69">>, 69}, + {<<"1337">>, 1337}, + {<<"3495">>, 3495}, + {<<"1234567890">>, 1234567890} + ], + [{V, fun() -> R = parse_content_length(V) end} || {V, R} <- Tests]. + +parse_content_length_error_test_() -> + Tests = [ + <<>>, + <<"-1">>, + <<"123, 123">>, + <<"4.17">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_content_length(V)) end} || V <- Tests]. + +horse_parse_content_length_zero() -> + horse:repeat(100000, + parse_content_length(<<"0">>) + ). + +horse_parse_content_length_giga() -> + horse:repeat(100000, + parse_content_length(<<"1234567890">>) + ). +-endif. + +%% @doc Parse the Content-Range header. + +-spec parse_content_range(binary()) + -> {bytes, non_neg_integer(), non_neg_integer(), non_neg_integer() | '*'} + | {bytes, '*', non_neg_integer()} | {binary(), binary()}. +parse_content_range(<<"bytes */", C, R/bits >>) when ?IS_DIGIT(C) -> unsatisfied_range(R, C - $0); +parse_content_range(<<"bytes ", C, R/bits >>) when ?IS_DIGIT(C) -> byte_range_first(R, C - $0); +parse_content_range(<< C, R/bits >>) when ?IS_TOKEN(C) -> + ?LOWER(other_content_range_unit, R, <<>>). + +byte_range_first(<< $-, C, R/bits >>, First) when ?IS_DIGIT(C) -> byte_range_last(R, First, C - $0); +byte_range_first(<< C, R/bits >>, First) when ?IS_DIGIT(C) -> byte_range_first(R, First * 10 + C - $0). + +byte_range_last(<<"/*">>, First, Last) -> {bytes, First, Last, '*'}; +byte_range_last(<< $/, C, R/bits >>, First, Last) when ?IS_DIGIT(C) -> byte_range_complete(R, First, Last, C - $0); +byte_range_last(<< C, R/bits >>, First, Last) when ?IS_DIGIT(C) -> byte_range_last(R, First, Last * 10 + C - $0). + +byte_range_complete(<<>>, First, Last, Complete) -> {bytes, First, Last, Complete}; +byte_range_complete(<< C, R/bits >>, First, Last, Complete) when ?IS_DIGIT(C) -> + byte_range_complete(R, First, Last, Complete * 10 + C - $0). + +unsatisfied_range(<<>>, Complete) -> {bytes, '*', Complete}; +unsatisfied_range(<< C, R/bits >>, Complete) when ?IS_DIGIT(C) -> unsatisfied_range(R, Complete * 10 + C - $0). + +other_content_range_unit(<< $\s, R/bits >>, Unit) -> other_content_range_resp(R, Unit, <<>>); +other_content_range_unit(<< C, R/bits >>, Unit) when ?IS_TOKEN(C) -> + ?LOWER(other_content_range_unit, R, Unit). + +other_content_range_resp(<<>>, Unit, Resp) -> {Unit, Resp}; +other_content_range_resp(<< C, R/bits >>, Unit, Resp) when ?IS_CHAR(C) -> other_content_range_resp(R, Unit, << Resp/binary, C >>). + +-ifdef(TEST). +content_range() -> + ?LET(ContentRange, + oneof([ + ?SUCHTHAT({bytes, First, Last, Complete}, + {bytes, non_neg_integer(), non_neg_integer(), non_neg_integer()}, + First =< Last andalso Last < Complete), + ?SUCHTHAT({bytes, First, Last, '*'}, + {bytes, non_neg_integer(), non_neg_integer(), '*'}, + First =< Last), + {bytes, '*', non_neg_integer()}, + {token(), ?LET(L, list(abnf_char()), list_to_binary(L))} + ]), + {case ContentRange of + {Unit, Resp} when is_binary(Unit) -> {?LOWER(Unit), Resp}; + _ -> ContentRange + end, case ContentRange of + {bytes, First, Last, '*'} -> + << "bytes ", (integer_to_binary(First))/binary, "-", + (integer_to_binary(Last))/binary, "/*">>; + {bytes, First, Last, Complete} -> + << "bytes ", (integer_to_binary(First))/binary, "-", + (integer_to_binary(Last))/binary, "/", (integer_to_binary(Complete))/binary >>; + {bytes, '*', Complete} -> + << "bytes */", (integer_to_binary(Complete))/binary >>; + {Unit, Resp} -> + << Unit/binary, $\s, Resp/binary >> + end}). + +prop_parse_content_range() -> + ?FORALL({Res, ContentRange}, + content_range(), + Res =:= parse_content_range(ContentRange)). + +parse_content_range_test_() -> + Tests = [ + {<<"bytes 21010-47021/47022">>, {bytes, 21010, 47021, 47022}}, + {<<"bytes 500-999/8000">>, {bytes, 500, 999, 8000}}, + {<<"bytes 7000-7999/8000">>, {bytes, 7000, 7999, 8000}}, + {<<"bytes 42-1233/1234">>, {bytes, 42, 1233, 1234}}, + {<<"bytes 42-1233/*">>, {bytes, 42, 1233, '*'}}, + {<<"bytes */1234">>, {bytes, '*', 1234}}, + {<<"bytes 0-499/1234">>, {bytes, 0, 499, 1234}}, + {<<"bytes 500-999/1234">>, {bytes, 500, 999, 1234}}, + {<<"bytes 500-1233/1234">>, {bytes, 500, 1233, 1234}}, + {<<"bytes 734-1233/1234">>, {bytes, 734, 1233, 1234}}, + {<<"bytes */47022">>, {bytes, '*', 47022}}, + {<<"exampleunit 1.2-4.3/25">>, {<<"exampleunit">>, <<"1.2-4.3/25">>}}, + {<<"exampleunit 11.2-14.3/25">>, {<<"exampleunit">>, <<"11.2-14.3/25">>}} + ], + [{V, fun() -> R = parse_content_range(V) end} || {V, R} <- Tests]. + +parse_content_range_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_content_range(V)) end} || V <- Tests]. + +horse_parse_content_range_bytes() -> + horse:repeat(200000, + parse_content_range(<<"bytes 21010-47021/47022">>) + ). + +horse_parse_content_range_other() -> + horse:repeat(200000, + parse_content_range(<<"exampleunit 11.2-14.3/25">>) + ). +-endif. + +%% @doc Parse the Content-Type header. + +-spec parse_content_type(binary()) -> media_type(). +parse_content_type(<< C, R/bits >>) when ?IS_TOKEN(C) -> + ?LOWER(media_type, R, <<>>). + +media_type(<< $/, C, R/bits >>, T) when ?IS_TOKEN(C) -> + ?LOWER(media_subtype, R, T, <<>>); +media_type(<< C, R/bits >>, T) when ?IS_TOKEN(C) -> + ?LOWER(media_type, R, T). + +media_subtype(<< C, R/bits >>, T, S) when ?IS_TOKEN(C) -> + ?LOWER(media_subtype, R, T, S); +media_subtype(R, T, S) -> media_param_sep(R, T, S, []). + +media_param_sep(<<>>, T, S, P) -> {T, S, lists:reverse(P)}; +media_param_sep(<< $;, R/bits >>, T, S, P) -> media_before_param(R, T, S, P); +media_param_sep(<< C, R/bits >>, T, S, P) when ?IS_WS(C) -> media_param_sep(R, T, S, P). + +media_before_param(<< C, R/bits >>, T, S, P) when ?IS_WS(C)-> media_before_param(R, T, S, P); +media_before_param(<< "charset=", $", R/bits >>, T, S, P) -> media_charset_quoted(R, T, S, P, <<>>); +media_before_param(<< "charset=", R/bits >>, T, S, P) -> media_charset(R, T, S, P, <<>>); +media_before_param(<< C, R/bits >>, T, S, P) when ?IS_TOKEN(C) -> + ?LOWER(media_param, R, T, S, P, <<>>). + +media_charset_quoted(<< $", R/bits >>, T, S, P, V) -> + media_param_sep(R, T, S, [{<<"charset">>, V}|P]); +media_charset_quoted(<< $\\, C, R/bits >>, T, S, P, V) when ?IS_VCHAR_OBS(C) -> + ?LOWER(media_charset_quoted, R, T, S, P, V); +media_charset_quoted(<< C, R/bits >>, T, S, P, V) when ?IS_VCHAR_OBS(C) -> + ?LOWER(media_charset_quoted, R, T, S, P, V). + +media_charset(<< C, R/bits >>, T, S, P, V) when ?IS_TOKEN(C) -> + ?LOWER(media_charset, R, T, S, P, V); +media_charset(R, T, S, P, V) -> media_param_sep(R, T, S, [{<<"charset">>, V}|P]). + +media_param(<< $=, $", R/bits >>, T, S, P, K) -> media_quoted(R, T, S, P, K, <<>>); +media_param(<< $=, C, R/bits >>, T, S, P, K) when ?IS_TOKEN(C) -> media_value(R, T, S, P, K, << C >>); +media_param(<< C, R/bits >>, T, S, P, K) when ?IS_TOKEN(C) -> + ?LOWER(media_param, R, T, S, P, K). + +media_quoted(<< $", R/bits >>, T, S, P, K, V) -> media_param_sep(R, T, S, [{K, V}|P]); +media_quoted(<< $\\, C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR_OBS(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>); +media_quoted(<< C, R/bits >>, T, S, P, K, V) when ?IS_VCHAR_OBS(C) -> media_quoted(R, T, S, P, K, << V/binary, C >>). + +media_value(<< C, R/bits >>, T, S, P, K, V) when ?IS_TOKEN(C) -> media_value(R, T, S, P, K, << V/binary, C >>); +media_value(R, T, S, P, K, V) -> media_param_sep(R, T, S, [{K, V}|P]). + +-ifdef(TEST). +media_type_parameter() -> + frequency([ + {90, parameter()}, + {10, {<<"charset">>, oneof([token(), quoted_string()]), <<>>, <<>>}} + ]). + +media_type() -> + ?LET({T, S, P}, + {token(), token(), small_list(media_type_parameter())}, + {T, S, P, iolist_to_binary([T, $/, S, [[OWS1, $;, OWS2, K, $=, V] || {K, V, OWS1, OWS2} <- P]])} + ). + +prop_parse_content_type() -> + ?FORALL({T, S, P, MediaType}, + media_type(), + begin + {ResT, ResS, ResP} = parse_content_type(MediaType), + ExpectedP = [case ?LOWER(K) of + <<"charset">> -> {<<"charset">>, ?LOWER(unquote(V))}; + LowK -> {LowK, unquote(V)} + end || {K, V, _, _} <- P], + ResT =:= ?LOWER(T) + andalso ResS =:= ?LOWER(S) + andalso ResP =:= ExpectedP + end + ). + +parse_content_type_test_() -> + Tests = [ + {<<"text/html;charset=utf-8">>, + {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}}, + {<<"text/html;charset=UTF-8">>, + {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}}, + {<<"Text/HTML;Charset=\"utf-8\"">>, + {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}}, + {<<"text/html; charset=\"utf-8\"">>, + {<<"text">>, <<"html">>, [{<<"charset">>, <<"utf-8">>}]}}, + {<<"text/html; charset=ISO-8859-4">>, + {<<"text">>, <<"html">>, [{<<"charset">>, <<"iso-8859-4">>}]}}, + {<<"text/plain; charset=iso-8859-4">>, + {<<"text">>, <<"plain">>, [{<<"charset">>, <<"iso-8859-4">>}]}}, + {<<"multipart/form-data \t;Boundary=\"MultipartIsUgly\"">>, + {<<"multipart">>, <<"form-data">>, [ + {<<"boundary">>, <<"MultipartIsUgly">>} + ]}}, + {<<"foo/bar; one=FirstParam; two=SecondParam">>, + {<<"foo">>, <<"bar">>, [ + {<<"one">>, <<"FirstParam">>}, + {<<"two">>, <<"SecondParam">>} + ]}} + ], + [{V, fun() -> R = parse_content_type(V) end} || {V, R} <- Tests]. + +horse_parse_content_type() -> + horse:repeat(200000, + parse_content_type(<<"text/html;charset=utf-8">>) + ). +-endif. + +%% @doc Parse the Date header. + +-spec parse_date(binary()) -> calendar:datetime(). +parse_date(Date) -> + cow_date:parse_date(Date). + +-ifdef(TEST). +parse_date_test_() -> + Tests = [ + {<<"Tue, 15 Nov 1994 08:12:31 GMT">>, {{1994, 11, 15}, {8, 12, 31}}} + ], + [{V, fun() -> R = parse_date(V) end} || {V, R} <- Tests]. +-endif. + +%% @doc Parse the ETag header. + +-spec parse_etag(binary()) -> etag(). +parse_etag(<< $W, $/, $", R/bits >>) -> + etag(R, weak, <<>>); +parse_etag(<< $", R/bits >>) -> + etag(R, strong, <<>>). + +etag(<< $" >>, Strength, Tag) -> + {Strength, Tag}; +etag(<< C, R/bits >>, Strength, Tag) when ?IS_ETAGC(C) -> + etag(R, Strength, << Tag/binary, C >>). + +-ifdef(TEST). +etagc() -> + ?SUCHTHAT(C, int(16#21, 16#ff), C =/= 16#22 andalso C =/= 16#7f). + +etag() -> + ?LET({Strength, Tag}, + {elements([weak, strong]), list(etagc())}, + begin + TagBin = list_to_binary(Tag), + {{Strength, TagBin}, + case Strength of + weak -> << $W, $/, $", TagBin/binary, $" >>; + strong -> << $", TagBin/binary, $" >> + end} + end). + +prop_parse_etag() -> + ?FORALL({Tag, TagBin}, + etag(), + Tag =:= parse_etag(TagBin)). + +parse_etag_test_() -> + Tests = [ + {<<"\"xyzzy\"">>, {strong, <<"xyzzy">>}}, + {<<"W/\"xyzzy\"">>, {weak, <<"xyzzy">>}}, + {<<"\"\"">>, {strong, <<>>}} + ], + [{V, fun() -> R = parse_etag(V) end} || {V, R} <- Tests]. + +parse_etag_error_test_() -> + Tests = [ + <<>>, + <<"\"">>, + <<"W">>, + <<"W/">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_etag(V)) end} || V <- Tests]. + +horse_parse_etag() -> + horse:repeat(200000, + parse_etag(<<"W/\"xyzzy\"">>) + ). +-endif. + +%% @doc Parse the Expect header. + +-spec parse_expect(binary()) -> continue. +parse_expect(<<"100-continue">>) -> + continue; +parse_expect(<<"100-", C, O, N, T, I, M, U, E >>) + when (C =:= $C) or (C =:= $c), (O =:= $O) or (O =:= $o), + (N =:= $N) or (N =:= $n), (T =:= $T) or (T =:= $t), + (I =:= $I) or (I =:= $i), (M =:= $N) or (M =:= $n), + (U =:= $U) or (U =:= $u), (E =:= $E) or (E =:= $e) -> + continue. + +-ifdef(TEST). +expect() -> + ?LET(E, + [$1, $0, $0, $-, + elements([$c, $C]), elements([$o, $O]), elements([$n, $N]), + elements([$t, $T]), elements([$i, $I]), elements([$n, $N]), + elements([$u, $U]), elements([$e, $E])], + list_to_binary(E)). + +prop_parse_expect() -> + ?FORALL(E, expect(), continue =:= parse_expect(E)). + +parse_expect_test_() -> + Tests = [ + <<"100-continue">>, + <<"100-CONTINUE">>, + <<"100-Continue">>, + <<"100-CoNtInUe">> + ], + [{V, fun() -> continue = parse_expect(V) end} || V <- Tests]. + +parse_expect_error_test_() -> + Tests = [ + <<>>, + <<" ">>, + <<"200-OK">>, + <<"Cookies">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_expect(V)) end} || V <- Tests]. + +horse_parse_expect() -> + horse:repeat(200000, + parse_expect(<<"100-continue">>) + ). +-endif. + +%% @doc Parse the Expires header. +%% +%% Recipients must interpret invalid date formats as a date +%% in the past. The value "0" is commonly used. + +-spec parse_expires(binary()) -> calendar:datetime(). +parse_expires(<<"0">>) -> + {{1, 1, 1}, {0, 0, 0}}; +parse_expires(Expires) -> + try + cow_date:parse_date(Expires) + catch _:_ -> + {{1, 1, 1}, {0, 0, 0}} + end. + +-ifdef(TEST). +parse_expires_test_() -> + Tests = [ + {<<"0">>, {{1, 1, 1}, {0, 0, 0}}}, + {<<"Thu, 01 Dec 1994 nope invalid">>, {{1, 1, 1}, {0, 0, 0}}}, + {<<"Thu, 01 Dec 1994 16:00:00 GMT">>, {{1994, 12, 1}, {16, 0, 0}}} + ], + [{V, fun() -> R = parse_expires(V) end} || {V, R} <- Tests]. + +horse_parse_expires_0() -> + horse:repeat(200000, + parse_expires(<<"0">>) + ). + +horse_parse_expires_invalid() -> + horse:repeat(200000, + parse_expires(<<"Thu, 01 Dec 1994 nope invalid">>) + ). +-endif. + +%% @doc Parse the Host header. +%% +%% We only seek to have legal characters and separate the +%% host and port values. The number of segments in the host +%% or the size of each segment is not checked. +%% +%% There is no way to distinguish IPv4 addresses from regular +%% names until the last segment is reached therefore we do not +%% differentiate them. +%% +%% The following valid hosts are currently rejected: IPv6 +%% addresses with a zone identifier; IPvFuture addresses; +%% and percent-encoded addresses. + +-spec parse_host(binary()) -> {binary(), 0..65535 | undefined}. +parse_host(<< $[, R/bits >>) -> + ipv6_address(R, << $[ >>); +parse_host(Host) -> + reg_name(Host, <<>>). + +ipv6_address(<< $] >>, IP) -> {<< IP/binary, $] >>, undefined}; +ipv6_address(<< $], $:, Port/bits >>, IP) -> {<< IP/binary, $] >>, binary_to_integer(Port)}; +ipv6_address(<< C, R/bits >>, IP) when ?IS_HEX(C) or (C =:= $:) or (C =:= $.) -> + ?LOWER(ipv6_address, R, IP). + +reg_name(<<>>, Name) -> {Name, undefined}; +reg_name(<< $:, Port/bits >>, Name) -> {Name, binary_to_integer(Port)}; +reg_name(<< C, R/bits >>, Name) when ?IS_URI_UNRESERVED(C) or ?IS_URI_SUB_DELIMS(C) -> + ?LOWER(reg_name, R, Name). + +-ifdef(TEST). +host_chars() -> "!$&'()*+,-.0123456789;=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~". +host() -> vector(1, 255, elements(host_chars())). + +host_port() -> + ?LET({Host, Port}, + {host(), oneof([undefined, int(1, 65535)])}, + begin + HostBin = list_to_binary(Host), + {{?LOWER(HostBin), Port}, + case Port of + undefined -> HostBin; + _ -> << HostBin/binary, $:, (integer_to_binary(Port))/binary >> + end} + end). + +prop_parse_host() -> + ?FORALL({Res, Host}, host_port(), Res =:= parse_host(Host)). + +parse_host_test_() -> + Tests = [ + {<<>>, {<<>>, undefined}}, + {<<"www.example.org:8080">>, {<<"www.example.org">>, 8080}}, + {<<"www.example.org">>, {<<"www.example.org">>, undefined}}, + {<<"192.0.2.1:8080">>, {<<"192.0.2.1">>, 8080}}, + {<<"192.0.2.1">>, {<<"192.0.2.1">>, undefined}}, + {<<"[2001:db8::1]:8080">>, {<<"[2001:db8::1]">>, 8080}}, + {<<"[2001:db8::1]">>, {<<"[2001:db8::1]">>, undefined}}, + {<<"[::ffff:192.0.2.1]:8080">>, {<<"[::ffff:192.0.2.1]">>, 8080}}, + {<<"[::ffff:192.0.2.1]">>, {<<"[::ffff:192.0.2.1]">>, undefined}} + ], + [{V, fun() -> R = parse_host(V) end} || {V, R} <- Tests]. + +horse_parse_host_blue_example_org() -> + horse:repeat(200000, + parse_host(<<"blue.example.org:8080">>) + ). + +horse_parse_host_ipv4() -> + horse:repeat(200000, + parse_host(<<"192.0.2.1:8080">>) + ). + +horse_parse_host_ipv6() -> + horse:repeat(200000, + parse_host(<<"[2001:db8::1]:8080">>) + ). + +horse_parse_host_ipv6_v4() -> + horse:repeat(200000, + parse_host(<<"[::ffff:192.0.2.1]:8080">>) + ). +-endif. + +%% @doc Parse the If-Match header. + +-spec parse_if_match(binary()) -> '*' | [etag()]. +parse_if_match(<<"*">>) -> + '*'; +parse_if_match(IfMatch) -> + nonempty(etag_list(IfMatch, [])). + +etag_list(<<>>, Acc) -> lists:reverse(Acc); +etag_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> etag_list(R, Acc); +etag_list(<< $W, $/, $", R/bits >>, Acc) -> etag(R, Acc, weak, <<>>); +etag_list(<< $", R/bits >>, Acc) -> etag(R, Acc, strong, <<>>). + +etag(<< $", R/bits >>, Acc, Strength, Tag) -> etag_list_sep(R, [{Strength, Tag}|Acc]); +etag(<< C, R/bits >>, Acc, Strength, Tag) when ?IS_ETAGC(C) -> etag(R, Acc, Strength, << Tag/binary, C >>). + +etag_list_sep(<<>>, Acc) -> lists:reverse(Acc); +etag_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> etag_list_sep(R, Acc); +etag_list_sep(<< $,, R/bits >>, Acc) -> etag_list(R, Acc). + +-ifdef(TEST). +prop_parse_if_match() -> + ?FORALL(L, + non_empty(list(etag())), + begin + << _, IfMatch/binary >> = iolist_to_binary([[$,, T] || {_, T} <- L]), + ResL = parse_if_match(IfMatch), + CheckedL = [T =:= ResT || {{T, _}, ResT} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end). + +parse_if_match_test_() -> + Tests = [ + {<<"\"xyzzy\"">>, [{strong, <<"xyzzy">>}]}, + {<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>, + [{strong, <<"xyzzy">>}, {strong, <<"r2d2xxxx">>}, {strong, <<"c3piozzzz">>}]}, + {<<"*">>, '*'} + ], + [{V, fun() -> R = parse_if_match(V) end} || {V, R} <- Tests]. + +parse_if_match_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_if_match(V)) end} || V <- Tests]. + +horse_parse_if_match() -> + horse:repeat(200000, + parse_if_match(<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>) + ). +-endif. + +%% @doc Parse the If-Modified-Since header. + +-spec parse_if_modified_since(binary()) -> calendar:datetime(). +parse_if_modified_since(IfModifiedSince) -> + cow_date:parse_date(IfModifiedSince). + +-ifdef(TEST). +parse_if_modified_since_test_() -> + Tests = [ + {<<"Sat, 29 Oct 1994 19:43:31 GMT">>, {{1994, 10, 29}, {19, 43, 31}}} + ], + [{V, fun() -> R = parse_if_modified_since(V) end} || {V, R} <- Tests]. +-endif. + +%% @doc Parse the If-None-Match header. + +-spec parse_if_none_match(binary()) -> '*' | [etag()]. +parse_if_none_match(<<"*">>) -> + '*'; +parse_if_none_match(IfNoneMatch) -> + nonempty(etag_list(IfNoneMatch, [])). + +-ifdef(TEST). +parse_if_none_match_test_() -> + Tests = [ + {<<"\"xyzzy\"">>, [{strong, <<"xyzzy">>}]}, + {<<"W/\"xyzzy\"">>, [{weak, <<"xyzzy">>}]}, + {<<"\"xyzzy\", \"r2d2xxxx\", \"c3piozzzz\"">>, + [{strong, <<"xyzzy">>}, {strong, <<"r2d2xxxx">>}, {strong, <<"c3piozzzz">>}]}, + {<<"W/\"xyzzy\", W/\"r2d2xxxx\", W/\"c3piozzzz\"">>, + [{weak, <<"xyzzy">>}, {weak, <<"r2d2xxxx">>}, {weak, <<"c3piozzzz">>}]}, + {<<"*">>, '*'} + ], + [{V, fun() -> R = parse_if_none_match(V) end} || {V, R} <- Tests]. + +parse_if_none_match_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_if_none_match(V)) end} || V <- Tests]. + +horse_parse_if_none_match() -> + horse:repeat(200000, + parse_if_none_match(<<"W/\"xyzzy\", W/\"r2d2xxxx\", W/\"c3piozzzz\"">>) + ). +-endif. + +%% @doc Parse the If-Range header. + +-spec parse_if_range(binary()) -> etag() | calendar:datetime(). +parse_if_range(<< $W, $/, $", R/bits >>) -> + etag(R, weak, <<>>); +parse_if_range(<< $", R/bits >>) -> + etag(R, strong, <<>>); +parse_if_range(IfRange) -> + cow_date:parse_date(IfRange). + +-ifdef(TEST). +parse_if_range_test_() -> + Tests = [ + {<<"W/\"xyzzy\"">>, {weak, <<"xyzzy">>}}, + {<<"\"xyzzy\"">>, {strong, <<"xyzzy">>}}, + {<<"Sat, 29 Oct 1994 19:43:31 GMT">>, {{1994, 10, 29}, {19, 43, 31}}} + ], + [{V, fun() -> R = parse_if_range(V) end} || {V, R} <- Tests]. + +parse_if_range_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_if_range(V)) end} || V <- Tests]. + +horse_parse_if_range_etag() -> + horse:repeat(200000, + parse_if_range(<<"\"xyzzy\"">>) + ). + +horse_parse_if_range_date() -> + horse:repeat(200000, + parse_if_range(<<"Sat, 29 Oct 1994 19:43:31 GMT">>) + ). +-endif. + +%% @doc Parse the If-Unmodified-Since header. + +-spec parse_if_unmodified_since(binary()) -> calendar:datetime(). +parse_if_unmodified_since(IfModifiedSince) -> + cow_date:parse_date(IfModifiedSince). + +-ifdef(TEST). +parse_if_unmodified_since_test_() -> + Tests = [ + {<<"Sat, 29 Oct 1994 19:43:31 GMT">>, {{1994, 10, 29}, {19, 43, 31}}} + ], + [{V, fun() -> R = parse_if_unmodified_since(V) end} || {V, R} <- Tests]. +-endif. + +%% @doc Parse the Last-Modified header. + +-spec parse_last_modified(binary()) -> calendar:datetime(). +parse_last_modified(LastModified) -> + cow_date:parse_date(LastModified). + +-ifdef(TEST). +parse_last_modified_test_() -> + Tests = [ + {<<"Tue, 15 Nov 1994 12:45:26 GMT">>, {{1994, 11, 15}, {12, 45, 26}}} + ], + [{V, fun() -> R = parse_last_modified(V) end} || {V, R} <- Tests]. +-endif. + +%% @doc Parse the Max-Forwards header. + +-spec parse_max_forwards(binary()) -> non_neg_integer(). +parse_max_forwards(MaxForwards) -> + I = binary_to_integer(MaxForwards), + true = I >= 0, + I. + +-ifdef(TEST). +prop_parse_max_forwards() -> + ?FORALL( + X, + non_neg_integer(), + X =:= parse_max_forwards(integer_to_binary(X)) + ). + +parse_max_forwards_test_() -> + Tests = [ + {<<"0">>, 0}, + {<<"42">>, 42}, + {<<"69">>, 69}, + {<<"1337">>, 1337}, + {<<"1234567890">>, 1234567890} + ], + [{V, fun() -> R = parse_max_forwards(V) end} || {V, R} <- Tests]. + +parse_max_forwards_error_test_() -> + Tests = [ + <<>>, + <<"123, 123">>, + <<"4.17">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_max_forwards(V)) end} || V <- Tests]. +-endif. + +%% @doc Parse the Pragma header. +%% +%% Legacy header kept for backward compatibility with HTTP/1.0 caches. +%% Only the "no-cache" directive was ever specified, and only for +%% request messages. +%% +%% We take a large shortcut in the parsing of this header, expecting +%% an exact match of "no-cache". + +-spec parse_pragma(binary()) -> cache | no_cache. +parse_pragma(<<"no-cache">>) -> no_cache; +parse_pragma(_) -> cache. + +%% @doc Parse the Proxy-Authenticate header. +%% +%% Alias of parse_www_authenticate/1 due to identical syntax. + +-spec parse_proxy_authenticate(binary()) -> [{basic, binary()} + | {bearer | digest | binary(), [{binary(), binary()}]}]. +parse_proxy_authenticate(ProxyAuthenticate) -> + parse_www_authenticate(ProxyAuthenticate). + +%% @doc Parse the Proxy-Authorization header. +%% +%% Alias of parse_authorization/1 due to identical syntax. + +-spec parse_proxy_authorization(binary()) + -> {basic, binary(), binary()} + | {bearer, binary()} + | {digest, [{binary(), binary()}]}. +parse_proxy_authorization(ProxyAuthorization) -> + parse_authorization(ProxyAuthorization). + +%% @doc Parse the Range header. + +-spec parse_range(binary()) + -> {bytes, [{non_neg_integer(), non_neg_integer() | infinity} | neg_integer()]} + | {binary(), binary()}. +parse_range(<<"bytes=", R/bits >>) -> + bytes_range_set(R, []); +parse_range(<< C, R/bits >>) when ?IS_TOKEN(C) -> + ?LOWER(other_range_unit, R, <<>>). + +bytes_range_set(<<>>, Acc) -> {bytes, lists:reverse(Acc)}; +bytes_range_set(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> bytes_range_set(R, Acc); +bytes_range_set(<< $-, C, R/bits >>, Acc) when ?IS_DIGIT(C) -> bytes_range_suffix_spec(R, Acc, C - $0); +bytes_range_set(<< C, R/bits >>, Acc) when ?IS_DIGIT(C) -> bytes_range_spec(R, Acc, C - $0). + +bytes_range_spec(<< $-, C, R/bits >>, Acc, First) when ?IS_DIGIT(C) -> bytes_range_spec_last(R, Acc, First, C - $0); +bytes_range_spec(<< $-, R/bits >>, Acc, First) -> bytes_range_set_sep(R, [{First, infinity}|Acc]); +bytes_range_spec(<< C, R/bits >>, Acc, First) when ?IS_DIGIT(C) -> bytes_range_spec(R, Acc, First * 10 + C - $0). + +bytes_range_spec_last(<< C, R/bits >>, Acc, First, Last) when ?IS_DIGIT(C) -> bytes_range_spec_last(R, Acc, First, Last * 10 + C - $0); +bytes_range_spec_last(R, Acc, First, Last) -> bytes_range_set_sep(R, [{First, Last}|Acc]). + +bytes_range_suffix_spec(<< C, R/bits >>, Acc, Suffix) when ?IS_DIGIT(C) -> bytes_range_suffix_spec(R, Acc, Suffix * 10 + C - $0); +bytes_range_suffix_spec(R, Acc, Suffix) -> bytes_range_set_sep(R, [-Suffix|Acc]). + +bytes_range_set_sep(<<>>, Acc) -> {bytes, lists:reverse(Acc)}; +bytes_range_set_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> bytes_range_set_sep(R, Acc); +bytes_range_set_sep(<< $,, R/bits >>, Acc) -> bytes_range_set(R, Acc). + +other_range_unit(<< $=, C, R/bits >>, U) when ?IS_VCHAR(C) -> + other_range_set(R, U, << C >>); +other_range_unit(<< C, R/bits >>, U) when ?IS_TOKEN(C) -> + ?LOWER(other_range_unit, R, U). + +other_range_set(<<>>, U, S) -> + {U, S}; +other_range_set(<< C, R/bits >>, U, S) when ?IS_VCHAR(C) -> + other_range_set(R, U, << S/binary, C >>). + +-ifdef(TEST). +bytes_range() -> + ?LET(BytesSet, + non_empty(list(oneof([ + ?SUCHTHAT({First, Last}, {pos_integer(), pos_integer()}, First =< Last), + {pos_integer(), infinity}, + ?LET(I, pos_integer(), -I) + ]))), + {{bytes, BytesSet}, begin + << _, Set/bits >> = iolist_to_binary([ + case Spec of + {First, infinity} -> [$,, integer_to_binary(First), $-]; + {First, Last} -> [$,, integer_to_binary(First), $-, integer_to_binary(Last)]; + Suffix -> [$,, integer_to_binary(Suffix)] + end || Spec <- BytesSet]), + <<"bytes=", Set/binary >> + end}). + +other_range() -> + ?LET(Range = {Unit, Set}, + {token(), ?LET(L, non_empty(list(vchar())), list_to_binary(L))}, + {Range, << Unit/binary, $=, Set/binary >>}). + +range() -> + oneof([ + bytes_range(), + other_range() + ]). + +prop_parse_range() -> + ?FORALL({Range, RangeBin}, + range(), + begin + Range2 = case Range of + {bytes, _} -> Range; + {Unit, Set} -> {?LOWER(Unit), Set} + end, + Range2 =:= parse_range(RangeBin) + end). + +parse_range_test_() -> + Tests = [ + {<<"bytes=0-499">>, {bytes, [{0, 499}]}}, + {<<"bytes=500-999">>, {bytes, [{500, 999}]}}, + {<<"bytes=-500">>, {bytes, [-500]}}, + {<<"bytes=9500-">>, {bytes, [{9500, infinity}]}}, + {<<"bytes=0-0,-1">>, {bytes, [{0, 0}, -1]}}, + {<<"bytes=500-600,601-999">>, {bytes, [{500, 600}, {601, 999}]}}, + {<<"bytes=500-700,601-999">>, {bytes, [{500, 700}, {601, 999}]}}, + {<<"books=I-III,V-IX">>, {<<"books">>, <<"I-III,V-IX">>}} + ], + [{V, fun() -> R = parse_range(V) end} || {V, R} <- Tests]. + +parse_range_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_range(V)) end} || V <- Tests]. + +horse_parse_range_first_last() -> + horse:repeat(200000, + parse_range(<<"bytes=500-999">>) + ). + +horse_parse_range_infinity() -> + horse:repeat(200000, + parse_range(<<"bytes=9500-">>) + ). + +horse_parse_range_suffix() -> + horse:repeat(200000, + parse_range(<<"bytes=-500">>) + ). + +horse_parse_range_two() -> + horse:repeat(200000, + parse_range(<<"bytes=500-700,601-999">>) + ). + +horse_parse_range_other() -> + horse:repeat(200000, + parse_range(<<"books=I-III,V-IX">>) + ). +-endif. + +%% @doc Parse the Retry-After header. + +-spec parse_retry_after(binary()) -> non_neg_integer() | calendar:datetime(). +parse_retry_after(RetryAfter = << D, _/bits >>) when ?IS_DIGIT(D) -> + I = binary_to_integer(RetryAfter), + true = I >= 0, + I; +parse_retry_after(RetryAfter) -> + cow_date:parse_date(RetryAfter). + +-ifdef(TEST). +parse_retry_after_test_() -> + Tests = [ + {<<"Fri, 31 Dec 1999 23:59:59 GMT">>, {{1999, 12, 31}, {23, 59, 59}}}, + {<<"120">>, 120} + ], + [{V, fun() -> R = parse_retry_after(V) end} || {V, R} <- Tests]. + +parse_retry_after_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_retry_after(V)) end} || V <- Tests]. + +horse_parse_retry_after_date() -> + horse:repeat(200000, + parse_retry_after(<<"Fri, 31 Dec 1999 23:59:59 GMT">>) + ). + +horse_parse_retry_after_delay_seconds() -> + horse:repeat(200000, + parse_retry_after(<<"120">>) + ). +-endif. + +%% @doc Dummy parsing function for the Sec-WebSocket-Accept header. +%% +%% The argument is returned without any processing. This value is +%% expected to be matched directly by the client so no parsing is +%% needed. + +-spec parse_sec_websocket_accept(binary()) -> binary(). +parse_sec_websocket_accept(SecWebSocketAccept) -> + SecWebSocketAccept. + +%% @doc Parse the Sec-WebSocket-Extensions request header. + +-spec parse_sec_websocket_extensions(binary()) -> [{binary(), [binary() | {binary(), binary()}]}]. +parse_sec_websocket_extensions(SecWebSocketExtensions) -> + nonempty(ws_extension_list(SecWebSocketExtensions, [])). + +ws_extension_list(<<>>, Acc) -> lists:reverse(Acc); +ws_extension_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> ws_extension_list(R, Acc); +ws_extension_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> ws_extension(R, Acc, << C >>). + +ws_extension(<< C, R/bits >>, Acc, E) when ?IS_TOKEN(C) -> ws_extension(R, Acc, << E/binary, C >>); +ws_extension(R, Acc, E) -> ws_extension_param_sep(R, Acc, E, []). + +ws_extension_param_sep(<<>>, Acc, E, P) -> lists:reverse([{E, lists:reverse(P)}|Acc]); +ws_extension_param_sep(<< $,, R/bits >>, Acc, E, P) -> ws_extension_list(R, [{E, lists:reverse(P)}|Acc]); +ws_extension_param_sep(<< $;, R/bits >>, Acc, E, P) -> ws_extension_before_param(R, Acc, E, P); +ws_extension_param_sep(<< C, R/bits >>, Acc, E, P) when ?IS_WS(C) -> ws_extension_param_sep(R, Acc, E, P). + +ws_extension_before_param(<< C, R/bits >>, Acc, E, P) when ?IS_WS(C) -> ws_extension_before_param(R, Acc, E, P); +ws_extension_before_param(<< C, R/bits >>, Acc, E, P) when ?IS_TOKEN(C) -> ws_extension_param(R, Acc, E, P, << C >>). + +ws_extension_param(<< $=, $", R/bits >>, Acc, E, P, K) -> ws_extension_quoted(R, Acc, E, P, K, <<>>); +ws_extension_param(<< $=, C, R/bits >>, Acc, E, P, K) when ?IS_TOKEN(C) -> ws_extension_value(R, Acc, E, P, K, << C >>); +ws_extension_param(<< C, R/bits >>, Acc, E, P, K) when ?IS_TOKEN(C) -> ws_extension_param(R, Acc, E, P, << K/binary, C >>); +ws_extension_param(R, Acc, E, P, K) -> ws_extension_param_sep(R, Acc, E, [K|P]). + +ws_extension_quoted(<< $", R/bits >>, Acc, E, P, K, V) -> ws_extension_param_sep(R, Acc, E, [{K, V}|P]); +ws_extension_quoted(<< $\\, C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_quoted(R, Acc, E, P, K, << V/binary, C >>); +ws_extension_quoted(<< C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_quoted(R, Acc, E, P, K, << V/binary, C >>). + +ws_extension_value(<< C, R/bits >>, Acc, E, P, K, V) when ?IS_TOKEN(C) -> ws_extension_value(R, Acc, E, P, K, << V/binary, C >>); +ws_extension_value(R, Acc, E, P, K, V) -> ws_extension_param_sep(R, Acc, E, [{K, V}|P]). + +-ifdef(TEST). +quoted_token() -> + ?LET(T, + non_empty(list(frequency([ + {99, tchar()}, + {1, [$\\, tchar()]} + ]))), + [$", T, $"]). + +ws_extension() -> + ?LET({E, PL}, + {token(), small_list({ows(), ows(), oneof([token(), {token(), oneof([token(), quoted_token()])}])})}, + {E, PL, iolist_to_binary([E, + [case P of + {OWS1, OWS2, {K, V}} -> [OWS1, $;, OWS2, K, $=, V]; + {OWS1, OWS2, K} -> [OWS1, $;, OWS2, K] + end || P <- PL] + ])}). + +prop_parse_sec_websocket_extensions() -> + ?FORALL(L, + vector(1, 50, ws_extension()), + begin + << _, SecWebsocketExtensions/binary >> = iolist_to_binary([[$,, E] || {_, _, E} <- L]), + ResL = parse_sec_websocket_extensions(SecWebsocketExtensions), + CheckedL = [begin + ExpectedPL = [case P of + {_, _, {K, V}} -> {K, unquote(V)}; + {_, _, K} -> K + end || P <- PL], + E =:= ResE andalso ExpectedPL =:= ResPL + end || {{E, PL, _}, {ResE, ResPL}} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end). + +parse_sec_websocket_extensions_test_() -> + Tests = [ + {<<"foo">>, [{<<"foo">>, []}]}, + {<<"bar; baz=2">>, [{<<"bar">>, [{<<"baz">>, <<"2">>}]}]}, + {<<"foo, bar; baz=2">>, [{<<"foo">>, []}, {<<"bar">>, [{<<"baz">>, <<"2">>}]}]}, + {<<"deflate-stream">>, [{<<"deflate-stream">>, []}]}, + {<<"mux; max-channels=4; flow-control, deflate-stream">>, + [{<<"mux">>, [{<<"max-channels">>, <<"4">>}, <<"flow-control">>]}, {<<"deflate-stream">>, []}]}, + {<<"private-extension">>, [{<<"private-extension">>, []}]} + ], + [{V, fun() -> R = parse_sec_websocket_extensions(V) end} || {V, R} <- Tests]. + +parse_sec_websocket_extensions_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_extensions(V)) end} + || V <- Tests]. + +horse_parse_sec_websocket_extensions() -> + horse:repeat(200000, + parse_sec_websocket_extensions(<<"mux; max-channels=4; flow-control, deflate-stream">>) + ). +-endif. + +%% @doc Dummy parsing function for the Sec-WebSocket-Key header. +%% +%% The argument is returned without any processing. This value is +%% expected to be prepended to a static value, the result of which +%% hashed to form a new base64 value returned in Sec-WebSocket-Accept, +%% therefore no parsing is needed. + +-spec parse_sec_websocket_key(binary()) -> binary(). +parse_sec_websocket_key(SecWebSocketKey) -> + SecWebSocketKey. + +%% @doc Parse the Sec-WebSocket-Protocol request header. + +-spec parse_sec_websocket_protocol_req(binary()) -> [binary()]. +parse_sec_websocket_protocol_req(SecWebSocketProtocol) -> + nonempty(token_ci_list(SecWebSocketProtocol, [])). + +-ifdef(TEST). +parse_sec_websocket_protocol_req_test_() -> + Tests = [ + {<<"chat, superchat">>, [<<"chat">>, <<"superchat">>]} + ], + [{V, fun() -> R = parse_sec_websocket_protocol_req(V) end} || {V, R} <- Tests]. + +parse_sec_websocket_protocol_req_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_protocol_req(V)) end} + || V <- Tests]. + +horse_parse_sec_websocket_protocol_req() -> + horse:repeat(200000, + parse_sec_websocket_protocol_req(<<"chat, superchat">>) + ). +-endif. + +%% @doc Parse the Sec-Websocket-Protocol response header. + +-spec parse_sec_websocket_protocol_resp(binary()) -> binary(). +parse_sec_websocket_protocol_resp(<< C, R/bits >>) when ?IS_TOKEN(C) -> + ?LOWER(token_ci, R, <<>>). + +token_ci(<<>>, T) -> T; +token_ci(<< C, R/bits >>, T) when ?IS_TOKEN(C) -> + ?LOWER(token_ci, R, T). + +-ifdef(TEST). +prop_parse_sec_websocket_protocol_resp() -> + ?FORALL(T, + token(), + ?LOWER(T) =:= parse_sec_websocket_protocol_resp(T)). + +parse_sec_websocket_protocol_resp_test_() -> + Tests = [ + {<<"chat">>, <<"chat">>}, + {<<"CHAT">>, <<"chat">>} + ], + [{V, fun() -> R = parse_sec_websocket_protocol_resp(V) end} || {V, R} <- Tests]. + +parse_sec_websocket_protocol_resp_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_protocol_resp(V)) end} + || V <- Tests]. + +horse_parse_sec_websocket_protocol_resp() -> + horse:repeat(200000, + parse_sec_websocket_protocol_resp(<<"chat">>) + ). +-endif. + +%% @doc Parse the Sec-WebSocket-Version request header. + +-spec parse_sec_websocket_version_req(binary()) -> websocket_version(). +parse_sec_websocket_version_req(SecWebSocketVersion) when byte_size(SecWebSocketVersion) < 4 -> + Version = binary_to_integer(SecWebSocketVersion), + true = Version >= 0 andalso Version =< 255, + Version. + +-ifdef(TEST). +prop_parse_sec_websocket_version_req() -> + ?FORALL(Version, + int(0, 255), + Version =:= parse_sec_websocket_version_req(integer_to_binary(Version))). + +parse_sec_websocket_version_req_test_() -> + Tests = [ + {<<"13">>, 13}, + {<<"25">>, 25} + ], + [{V, fun() -> R = parse_sec_websocket_version_req(V) end} || {V, R} <- Tests]. + +parse_sec_websocket_version_req_error_test_() -> + Tests = [ + <<>>, + <<" ">>, + <<"7, 8, 13">>, + <<"invalid">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_version_req(V)) end} + || V <- Tests]. + +horse_parse_sec_websocket_version_req_13() -> + horse:repeat(200000, + parse_sec_websocket_version_req(<<"13">>) + ). + +horse_parse_sec_websocket_version_req_255() -> + horse:repeat(200000, + parse_sec_websocket_version_req(<<"255">>) + ). +-endif. + +%% @doc Parse the Sec-WebSocket-Version response header. + +-spec parse_sec_websocket_version_resp(binary()) -> [websocket_version()]. +parse_sec_websocket_version_resp(SecWebSocketVersion) -> + nonempty(ws_version_list(SecWebSocketVersion, [])). + +ws_version_list(<<>>, Acc) -> lists:reverse(Acc); +ws_version_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> ws_version_list(R, Acc); +ws_version_list(<< C, R/bits >>, Acc) when ?IS_DIGIT(C) -> ws_version(R, Acc, C - $0). + +ws_version(<< C, R/bits >>, Acc, V) when ?IS_DIGIT(C) -> ws_version(R, Acc, V * 10 + C - $0); +ws_version(R, Acc, V) -> ws_version_list_sep(R, [V|Acc]). + +ws_version_list_sep(<<>>, Acc) -> lists:reverse(Acc); +ws_version_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> ws_version_list_sep(R, Acc); +ws_version_list_sep(<< $,, R/bits >>, Acc) -> ws_version_list(R, Acc). + +-ifdef(TEST). +sec_websocket_version_resp() -> + ?LET(L, + non_empty(list({ows(), ows(), int(0, 255)})), + begin + << _, SecWebSocketVersion/binary >> = iolist_to_binary( + [[OWS1, $,, OWS2, integer_to_binary(V)] || {OWS1, OWS2, V} <- L]), + {[V || {_, _, V} <- L], SecWebSocketVersion} + end). + +prop_parse_sec_websocket_version_resp() -> + ?FORALL({L, SecWebSocketVersion}, + sec_websocket_version_resp(), + L =:= parse_sec_websocket_version_resp(SecWebSocketVersion)). + +parse_sec_websocket_version_resp_test_() -> + Tests = [ + {<<"13, 8, 7">>, [13, 8, 7]} + ], + [{V, fun() -> R = parse_sec_websocket_version_resp(V) end} || {V, R} <- Tests]. + +parse_sec_websocket_version_resp_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_sec_websocket_version_resp(V)) end} + || V <- Tests]. + +horse_parse_sec_websocket_version_resp() -> + horse:repeat(200000, + parse_sec_websocket_version_resp(<<"13, 8, 7">>) + ). +-endif. + +%% @doc Parse the TE header. +%% +%% This function does not support parsing of transfer-parameter. + +-spec parse_te(binary()) -> {trailers | no_trailers, [{binary(), qvalue()}]}. +parse_te(TE) -> + te_list(TE, no_trailers, []). + +te_list(<<>>, Trail, Acc) -> {Trail, lists:reverse(Acc)}; +te_list(<< C, R/bits >>, Trail, Acc) when ?IS_WS_COMMA(C) -> te_list(R, Trail, Acc); +te_list(<< "trailers", R/bits >>, Trail, Acc) -> te(R, Trail, Acc, <<"trailers">>); +te_list(<< "compress", R/bits >>, Trail, Acc) -> te(R, Trail, Acc, <<"compress">>); +te_list(<< "deflate", R/bits >>, Trail, Acc) -> te(R, Trail, Acc, <<"deflate">>); +te_list(<< "gzip", R/bits >>, Trail, Acc) -> te(R, Trail, Acc, <<"gzip">>); +te_list(<< C, R/bits >>, Trail, Acc) when ?IS_TOKEN(C) -> + ?LOWER(te, R, Trail, Acc, <<>>). + +te(<<>>, _, Acc, <<"trailers">>) -> {trailers, lists:reverse(Acc)}; +te(<< $,, R/bits >>, _, Acc, <<"trailers">>) -> te_list(R, trailers, Acc); +te(<< $;, R/bits >>, Trail, Acc, T) when T =/= <<"trailers">> -> te_before_weight(R, Trail, Acc, T); +te(<< C, R/bits >>, _, Acc, <<"trailers">>) when ?IS_WS(C) -> te_list_sep(R, trailers, Acc); +te(<< C, R/bits >>, Trail, Acc, T) when ?IS_TOKEN(C) -> + ?LOWER(te, R, Trail, Acc, T); +te(R, Trail, Acc, T) -> te_param_sep(R, Trail, Acc, T). + +te_param_sep(<<>>, Trail, Acc, T) -> {Trail, lists:reverse([{T, 1000}|Acc])}; +te_param_sep(<< $,, R/bits >>, Trail, Acc, T) -> te_list(R, Trail, [{T, 1000}|Acc]); +te_param_sep(<< C, R/bits >>, Trail, Acc, T) when ?IS_WS(C) -> te_param_sep(R, Trail, Acc, T). + +te_before_weight(<< C, R/bits >>, Trail, Acc, T) when ?IS_WS(C) -> te_before_weight(R, Trail, Acc, T); +te_before_weight(<< $q, $=, R/bits >>, Trail, Acc, T) -> te_weight(R, Trail, Acc, T). + +te_weight(<< "1.000", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 1000}|Acc]); +te_weight(<< "1.00", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 1000}|Acc]); +te_weight(<< "1.0", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 1000}|Acc]); +te_weight(<< "1.", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 1000}|Acc]); +te_weight(<< "1", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 1000}|Acc]); +te_weight(<< "0.", A, B, C, R/bits >>, Trail, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B), ?IS_DIGIT(C) -> + te_list_sep(R, Trail, [{T, (A - $0) * 100 + (B - $0) * 10 + (C - $0)}|Acc]); +te_weight(<< "0.", A, B, R/bits >>, Trail, Acc, T) when ?IS_DIGIT(A), ?IS_DIGIT(B) -> + te_list_sep(R, Trail, [{T, (A - $0) * 100 + (B - $0) * 10}|Acc]); +te_weight(<< "0.", A, R/bits >>, Trail, Acc, T) when ?IS_DIGIT(A) -> + te_list_sep(R, Trail, [{T, (A - $0) * 100}|Acc]); +te_weight(<< "0.", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 0}|Acc]); +te_weight(<< "0", R/bits >>, Trail, Acc, T) -> te_list_sep(R, Trail, [{T, 0}|Acc]). + +te_list_sep(<<>>, Trail, Acc) -> {Trail, lists:reverse(Acc)}; +te_list_sep(<< C, R/bits >>, Trail, Acc) when ?IS_WS(C) -> te_list_sep(R, Trail, Acc); +te_list_sep(<< $,, R/bits >>, Trail, Acc) -> te_list(R, Trail, Acc). + +-ifdef(TEST). +te() -> + ?LET({Trail, L}, + {elements([trailers, no_trailers]), + small_non_empty_list({?SUCHTHAT(T, token(), T =/= <<"trailers">>), weight()})}, + {Trail, L, begin + L2 = case Trail of + no_trailers -> L; + trailers -> + Rand = random:uniform(length(L) + 1) - 1, + {Before, After} = lists:split(Rand, L), + Before ++ [{<<"trailers">>, undefined}|After] + end, + << _, TE/binary >> = iolist_to_binary([case W of + undefined -> [$,, T]; + _ -> [$,, T, <<";q=">>, qvalue_to_iodata(W)] + end || {T, W} <- L2]), + TE + end} + ). + +prop_parse_te() -> + random:seed(os:timestamp()), + ?FORALL({Trail, L, TE}, + te(), + begin + {ResTrail, ResL} = parse_te(TE), + CheckedL = [begin + ResT =:= ?LOWER(T) + andalso (ResW =:= W orelse (W =:= undefined andalso ResW =:= 1000)) + end || {{T, W}, {ResT, ResW}} <- lists:zip(L, ResL)], + ResTrail =:= Trail andalso [true] =:= lists:usort(CheckedL) + end). + +parse_te_test_() -> + Tests = [ + {<<"deflate">>, {no_trailers, [{<<"deflate">>, 1000}]}}, + {<<>>, {no_trailers, []}}, + {<<"trailers, deflate;q=0.5">>, {trailers, [{<<"deflate">>, 500}]}} + ], + [{V, fun() -> R = parse_te(V) end} || {V, R} <- Tests]. + +horse_parse_te() -> + horse:repeat(200000, + parse_te(<<"trailers, deflate;q=0.5">>) + ). +-endif. + +%% @doc Parse the Trailer header. + +-spec parse_trailer(binary()) -> [binary()]. +parse_trailer(Trailer) -> + nonempty(token_ci_list(Trailer, [])). + +-ifdef(TEST). +parse_trailer_test_() -> + Tests = [ + {<<"Date, Content-MD5">>, [<<"date">>, <<"content-md5">>]} + ], + [{V, fun() -> R = parse_trailer(V) end} || {V, R} <- Tests]. + +parse_trailer_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_trailer(V)) end} || V <- Tests]. + +horse_parse_trailer() -> + horse:repeat(200000, + parse_trailer(<<"Date, Content-MD5">>) + ). +-endif. + +%% @doc Parse the Transfer-Encoding header. +%% +%% This function does not support parsing of transfer-parameter. + +-spec parse_transfer_encoding(binary()) -> [binary()]. +parse_transfer_encoding(<<"chunked">>) -> + [<<"chunked">>]; +parse_transfer_encoding(TransferEncoding) -> + nonempty(token_ci_list(TransferEncoding, [])). + +-ifdef(TEST). +prop_parse_transfer_encoding() -> + ?FORALL(L, + non_empty(list(token())), + begin + << _, TransferEncoding/binary >> = iolist_to_binary([[$,, C] || C <- L]), + ResL = parse_transfer_encoding(TransferEncoding), + CheckedL = [?LOWER(Co) =:= ResC || {Co, ResC} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end). + +parse_transfer_encoding_test_() -> + Tests = [ + {<<"a , , , ">>, [<<"a">>]}, + {<<" , , , a">>, [<<"a">>]}, + {<<"a , , b">>, [<<"a">>, <<"b">>]}, + {<<"chunked">>, [<<"chunked">>]}, + {<<"chunked, something">>, [<<"chunked">>, <<"something">>]}, + {<<"gzip, chunked">>, [<<"gzip">>, <<"chunked">>]} + ], + [{V, fun() -> R = parse_transfer_encoding(V) end} || {V, R} <- Tests]. + +parse_transfer_encoding_error_test_() -> + Tests = [ + <<>>, + <<" ">>, + <<" , ">>, + <<",,,">>, + <<"a b">> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_transfer_encoding(V)) end} + || V <- Tests]. + +horse_parse_transfer_encoding_chunked() -> + horse:repeat(200000, + parse_transfer_encoding(<<"chunked">>) + ). + +horse_parse_transfer_encoding_custom() -> + horse:repeat(200000, + parse_transfer_encoding(<<"chunked, something">>) + ). +-endif. + +%% @doc Parse the Upgrade header. +%% +%% It is unclear from the RFC whether the values here are +%% case sensitive. +%% +%% We handle them in a case insensitive manner because they +%% are described as case insensitive in the Websocket RFC. + +-spec parse_upgrade(binary()) -> [binary()]. +parse_upgrade(Upgrade) -> + nonempty(protocol_list(Upgrade, [])). + +protocol_list(<<>>, Acc) -> lists:reverse(Acc); +protocol_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> protocol_list(R, Acc); +protocol_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> + ?LOWER(protocol_name, R, Acc, <<>>). + +protocol_name(<< $/, C, R/bits >>, Acc, P) -> + ?LOWER(protocol_version, R, Acc, << P/binary, $/ >>); +protocol_name(<< C, R/bits >>, Acc, P) when ?IS_TOKEN(C) -> + ?LOWER(protocol_name, R, Acc, P); +protocol_name(R, Acc, P) -> protocol_list_sep(R, [P|Acc]). + +protocol_version(<< C, R/bits >>, Acc, P) when ?IS_TOKEN(C) -> + ?LOWER(protocol_version, R, Acc, P); +protocol_version(R, Acc, P) -> protocol_list_sep(R, [P|Acc]). + +protocol_list_sep(<<>>, Acc) -> lists:reverse(Acc); +protocol_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> protocol_list_sep(R, Acc); +protocol_list_sep(<< $,, R/bits >>, Acc) -> protocol_list(R, Acc). + +-ifdef(TEST). +protocols() -> + ?LET(P, + oneof([token(), [token(), $/, token()]]), + iolist_to_binary(P)). + +prop_parse_upgrade() -> + ?FORALL(L, + non_empty(list(protocols())), + begin + << _, Upgrade/binary >> = iolist_to_binary([[$,, P] || P <- L]), + ResL = parse_upgrade(Upgrade), + CheckedL = [?LOWER(P) =:= ResP || {P, ResP} <- lists:zip(L, ResL)], + [true] =:= lists:usort(CheckedL) + end). + +parse_upgrade_test_() -> + Tests = [ + {<<"HTTP/2.0, SHTTP/1.3, IRC/6.9, RTA/x11">>, + [<<"http/2.0">>, <<"shttp/1.3">>, <<"irc/6.9">>, <<"rta/x11">>]}, + {<<"HTTP/2.0">>, [<<"http/2.0">>]} + ], + [{V, fun() -> R = parse_upgrade(V) end} || {V, R} <- Tests]. + +parse_upgrade_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_upgrade(V)) end} + || V <- Tests]. +-endif. + +%% @doc Parse the Vary header. + +-spec parse_vary(binary()) -> '*' | [binary()]. +parse_vary(<<"*">>) -> + '*'; +parse_vary(Vary) -> + nonempty(token_ci_list(Vary, [])). + +-ifdef(TEST). +parse_vary_test_() -> + Tests = [ + {<<"*">>, '*'}, + {<<"Accept-Encoding">>, [<<"accept-encoding">>]}, + {<<"accept-encoding, accept-language">>, [<<"accept-encoding">>, <<"accept-language">>]} + ], + [{V, fun() -> R = parse_vary(V) end} || {V, R} <- Tests]. + +parse_vary_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_vary(V)) end} || V <- Tests]. +-endif. + +%% @doc Parse the WWW-Authenticate header. +%% +%% Unknown schemes are represented as the lowercase binary +%% instead of an atom. Unlike with parse_authorization/1, +%% we do not crash on unknown schemes. +%% +%% When parsing auth-params, we do not accept BWS characters around the "=". + +-spec parse_www_authenticate(binary()) -> [{basic, binary()} + | {bearer | digest | binary(), [{binary(), binary()}]}]. +parse_www_authenticate(Authenticate) -> + nonempty(www_auth_list(Authenticate, [])). + +www_auth_list(<<>>, Acc) -> lists:reverse(Acc); +www_auth_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> www_auth_list(R, Acc); +www_auth_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> + ?LOWER(www_auth_scheme, R, Acc, <<>>). + +www_auth_basic_before_realm(<< C, R/bits >>, Acc) when ?IS_WS(C) -> www_auth_basic_before_realm(R, Acc); +www_auth_basic_before_realm(<< "realm=\"", R/bits >>, Acc) -> www_auth_basic(R, Acc, <<>>). + +www_auth_basic(<< $", R/bits >>, Acc, Realm) -> www_auth_list_sep(R, [{basic, Realm}|Acc]); +www_auth_basic(<< $\\, C, R/bits >>, Acc, Realm) when ?IS_VCHAR_OBS(C) -> www_auth_basic(R, Acc, << Realm/binary, C >>); +www_auth_basic(<< C, R/bits >>, Acc, Realm) when ?IS_VCHAR_OBS(C) -> www_auth_basic(R, Acc, << Realm/binary, C >>). + +www_auth_scheme(<< C, R/bits >>, Acc, Scheme) when ?IS_WS(C) -> + case Scheme of + <<"basic">> -> www_auth_basic_before_realm(R, Acc); + <<"bearer">> -> www_auth_params_list(R, Acc, bearer, []); + <<"digest">> -> www_auth_params_list(R, Acc, digest, []); + _ -> www_auth_params_list(R, Acc, Scheme, []) + end; +www_auth_scheme(<< C, R/bits >>, Acc, Scheme) when ?IS_TOKEN(C) -> + ?LOWER(www_auth_scheme, R, Acc, Scheme). + +www_auth_list_sep(<<>>, Acc) -> lists:reverse(Acc); +www_auth_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> www_auth_list_sep(R, Acc); +www_auth_list_sep(<< $,, R/bits >>, Acc) -> www_auth_list(R, Acc). + +www_auth_params_list(<<>>, Acc, Scheme, Params) -> + lists:reverse([{Scheme, lists:reverse(nonempty(Params))}|Acc]); +www_auth_params_list(<< C, R/bits >>, Acc, Scheme, Params) when ?IS_WS_COMMA(C) -> + www_auth_params_list(R, Acc, Scheme, Params); +www_auth_params_list(<< "algorithm=", C, R/bits >>, Acc, Scheme, Params) when ?IS_TOKEN(C) -> + www_auth_token(R, Acc, Scheme, Params, <<"algorithm">>, << C >>); +www_auth_params_list(<< "domain=\"", R/bits >>, Acc, Scheme, Params) -> + www_auth_quoted(R, Acc, Scheme, Params, <<"domain">>, <<>>); +www_auth_params_list(<< "error=\"", R/bits >>, Acc, Scheme, Params) -> + www_auth_quoted(R, Acc, Scheme, Params, <<"error">>, <<>>); +www_auth_params_list(<< "error_description=\"", R/bits >>, Acc, Scheme, Params) -> + www_auth_quoted(R, Acc, Scheme, Params, <<"error_description">>, <<>>); +www_auth_params_list(<< "error_uri=\"", R/bits >>, Acc, Scheme, Params) -> + www_auth_quoted(R, Acc, Scheme, Params, <<"error_uri">>, <<>>); +www_auth_params_list(<< "nonce=\"", R/bits >>, Acc, Scheme, Params) -> + www_auth_quoted(R, Acc, Scheme, Params, <<"nonce">>, <<>>); +www_auth_params_list(<< "opaque=\"", R/bits >>, Acc, Scheme, Params) -> + www_auth_quoted(R, Acc, Scheme, Params, <<"opaque">>, <<>>); +www_auth_params_list(<< "qop=\"", R/bits >>, Acc, Scheme, Params) -> + www_auth_quoted(R, Acc, Scheme, Params, <<"qop">>, <<>>); +www_auth_params_list(<< "realm=\"", R/bits >>, Acc, Scheme, Params) -> + www_auth_quoted(R, Acc, Scheme, Params, <<"realm">>, <<>>); +www_auth_params_list(<< "scope=\"", R/bits >>, Acc, Scheme, Params) -> + www_auth_quoted(R, Acc, Scheme, Params, <<"scope">>, <<>>); +www_auth_params_list(<< "stale=false", R/bits >>, Acc, Scheme, Params) -> + www_auth_params_list_sep(R, Acc, Scheme, [{<<"stale">>, <<"false">>}|Params]); +www_auth_params_list(<< "stale=true", R/bits >>, Acc, Scheme, Params) -> + www_auth_params_list_sep(R, Acc, Scheme, [{<<"stale">>, <<"true">>}|Params]); +www_auth_params_list(<< C, R/bits >>, Acc, Scheme, Params) when ?IS_TOKEN(C) -> + ?LOWER(www_auth_param, R, Acc, Scheme, Params, <<>>). + +www_auth_param(<< $=, $", R/bits >>, Acc, Scheme, Params, K) -> + www_auth_quoted(R, Acc, Scheme, Params, K, <<>>); +www_auth_param(<< $=, C, R/bits >>, Acc, Scheme, Params, K) when ?IS_TOKEN(C) -> + www_auth_token(R, Acc, Scheme, Params, K, << C >>); +www_auth_param(<< C, R/bits >>, Acc, Scheme, Params, K) when ?IS_TOKEN(C) -> + ?LOWER(www_auth_param, R, Acc, Scheme, Params, K); +www_auth_param(R, Acc, Scheme, Params, NewScheme) -> + www_auth_scheme(R, [{Scheme, lists:reverse(Params)}|Acc], NewScheme). + +www_auth_token(<< C, R/bits >>, Acc, Scheme, Params, K, V) when ?IS_TOKEN(C) -> + www_auth_token(R, Acc, Scheme, Params, K, << V/binary, C >>); +www_auth_token(R, Acc, Scheme, Params, K, V) -> + www_auth_params_list_sep(R, Acc, Scheme, [{K, V}|Params]). + +www_auth_quoted(<< $", R/bits >>, Acc, Scheme, Params, K, V) -> + www_auth_params_list_sep(R, Acc, Scheme, [{K, V}|Params]); +www_auth_quoted(<< $\\, C, R/bits >>, Acc, Scheme, Params, K, V) when ?IS_VCHAR_OBS(C) -> + www_auth_quoted(R, Acc, Scheme, Params, K, << V/binary, C >>); +www_auth_quoted(<< C, R/bits >>, Acc, Scheme, Params, K, V) when ?IS_VCHAR_OBS(C) -> + www_auth_quoted(R, Acc, Scheme, Params, K, << V/binary, C >>). + +www_auth_params_list_sep(<<>>, Acc, Scheme, Params) -> + lists:reverse([{Scheme, lists:reverse(Params)}|Acc]); +www_auth_params_list_sep(<< C, R/bits >>, Acc, Scheme, Params) when ?IS_WS(C) -> + www_auth_params_list_sep(R, Acc, Scheme, Params); +www_auth_params_list_sep(<< $,, R/bits >>, Acc, Scheme, Params) -> + www_auth_params_list_after_sep(R, Acc, Scheme, Params). + +www_auth_params_list_after_sep(<<>>, Acc, Scheme, Params) -> + lists:reverse([{Scheme, lists:reverse(Params)}|Acc]); +www_auth_params_list_after_sep(<< C, R/bits >>, Acc, Scheme, Params) when ?IS_WS_COMMA(C) -> + www_auth_params_list_after_sep(R, Acc, Scheme, Params); +www_auth_params_list_after_sep(R, Acc, Scheme, Params) -> + www_auth_params_list(R, Acc, Scheme, Params). + +-ifdef(TEST). +parse_www_authenticate_test_() -> + Tests = [ + {<<"Newauth realm=\"apps\", type=1, title=\"Login to \\\"apps\\\"\", Basic realm=\"simple\"">>, + [{<<"newauth">>, [ + {<<"realm">>, <<"apps">>}, + {<<"type">>, <<"1">>}, + {<<"title">>, <<"Login to \"apps\"">>}]}, + {basic, <<"simple">>}]}, + %% Same test, different order. + {<<"Basic realm=\"simple\", Newauth realm=\"apps\", type=1, title=\"Login to \\\"apps\\\"\"">>, + [{basic, <<"simple">>}, + {<<"newauth">>, [ + {<<"realm">>, <<"apps">>}, + {<<"type">>, <<"1">>}, + {<<"title">>, <<"Login to \"apps\"">>}]}]}, + {<<"Bearer realm=\"example\"">>, + [{bearer, [{<<"realm">>, <<"example">>}]}]}, + {<<"Bearer realm=\"example\", error=\"invalid_token\", error_description=\"The access token expired\"">>, + [{bearer, [ + {<<"realm">>, <<"example">>}, + {<<"error">>, <<"invalid_token">>}, + {<<"error_description">>, <<"The access token expired">>} + ]}]}, + {<<"Basic realm=\"WallyWorld\"">>, + [{basic, <<"WallyWorld">>}]}, + {<<"Digest realm=\"[email protected]\", qop=\"auth,auth-int\", " + "nonce=\"dcd98b7102dd2f0e8b11d0f600bfb0c093\", " + "opaque=\"5ccc069c403ebaf9f0171e9517f40e41\"">>, + [{digest, [ + {<<"realm">>, <<"[email protected]">>}, + {<<"qop">>, <<"auth,auth-int">>}, + {<<"nonce">>, <<"dcd98b7102dd2f0e8b11d0f600bfb0c093">>}, + {<<"opaque">>, <<"5ccc069c403ebaf9f0171e9517f40e41">>} + ]}]} + ], + [{V, fun() -> R = parse_www_authenticate(V) end} || {V, R} <- Tests]. + +parse_www_authenticate_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_www_authenticate(V)) end} || V <- Tests]. + +horse_parse_www_authenticate() -> + horse:repeat(200000, + parse_www_authenticate(<<"Newauth realm=\"apps\", type=1, title=\"Login to \\\"apps\\\"\", Basic realm=\"simple\"">>) + ). +-endif. + +%% @doc Parse the X-Forwarded-For header. +%% +%% This header has no specification but *looks like* it is +%% a list of tokens. +%% +%% This header is deprecated in favor of the Forwarded header. + +-spec parse_x_forwarded_for(binary()) -> [binary()]. +parse_x_forwarded_for(XForwardedFor) -> + nonempty(token_list(XForwardedFor, [])). + +-ifdef(TEST). +parse_x_forwarded_for_test_() -> + Tests = [ + {<<"client, proxy1, proxy2">>, [<<"client">>, <<"proxy1">>, <<"proxy2">>]}, + {<<"128.138.243.150, unknown, 192.52.106.30">>, [<<"128.138.243.150">>, <<"unknown">>, <<"192.52.106.30">>]} + ], + [{V, fun() -> R = parse_x_forwarded_for(V) end} || {V, R} <- Tests]. + +parse_x_forwarded_for_error_test_() -> + Tests = [ + <<>> + ], + [{V, fun() -> {'EXIT', _} = (catch parse_x_forwarded_for(V)) end} || V <- Tests]. +-endif. + +%% Internal. + +%% Only return if the list is not empty. +nonempty(L) when L =/= [] -> L. + +%% Parse a list of case sensitive tokens. +token_list(<<>>, Acc) -> lists:reverse(Acc); +token_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> token_list(R, Acc); +token_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> token(R, Acc, << C >>). + +token(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) -> token(R, Acc, << T/binary, C >>); +token(R, Acc, T) -> token_list_sep(R, [T|Acc]). + +token_list_sep(<<>>, Acc) -> lists:reverse(Acc); +token_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> token_list_sep(R, Acc); +token_list_sep(<< $,, R/bits >>, Acc) -> token_list(R, Acc). + +%% Parse a list of case insensitive tokens. +token_ci_list(<<>>, Acc) -> lists:reverse(Acc); +token_ci_list(<< C, R/bits >>, Acc) when ?IS_WS_COMMA(C) -> token_ci_list(R, Acc); +token_ci_list(<< C, R/bits >>, Acc) when ?IS_TOKEN(C) -> ?LOWER(token_ci, R, Acc, <<>>). + +token_ci(<< C, R/bits >>, Acc, T) when ?IS_TOKEN(C) -> ?LOWER(token_ci, R, Acc, T); +token_ci(R, Acc, T) -> token_ci_list_sep(R, [T|Acc]). + +token_ci_list_sep(<<>>, Acc) -> lists:reverse(Acc); +token_ci_list_sep(<< C, R/bits >>, Acc) when ?IS_WS(C) -> token_ci_list_sep(R, Acc); +token_ci_list_sep(<< $,, R/bits >>, Acc) -> token_ci_list(R, Acc). diff --git a/src/cow_http_te.erl b/src/cow_http_te.erl new file mode 100644 index 0000000..1e7b43f --- /dev/null +++ b/src/cow_http_te.erl @@ -0,0 +1,322 @@ +%% Copyright (c) 2014, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-module(cow_http_te). + +%% Identity. +-export([stream_identity/2]). +-export([identity/1]). + +%% Chunked. +-export([stream_chunked/2]). +-export([chunk/1]). +-export([last_chunk/0]). + +%% The state type is the same for both identity and chunked. +-type state() :: {non_neg_integer(), non_neg_integer()}. + +-type decode_ret() :: more + | {more, Data::binary(), state()} + | {more, Data::binary(), RemLen::non_neg_integer(), state()} + | {more, Data::binary(), Rest::binary(), state()} + | {done, TotalLen::non_neg_integer(), Rest::binary()} + | {done, Data::binary(), TotalLen::non_neg_integer(), Rest::binary()}. +-export_type([decode_ret/0]). + +-ifdef(TEST). +dripfeed(<< C, Rest/bits >>, Acc, State, F) -> + case F(<< Acc/binary, C >>, State) of + more -> + dripfeed(Rest, << Acc/binary, C >>, State, F); + {more, _, State2} -> + dripfeed(Rest, <<>>, State2, F); + {more, _, Length, State2} when is_integer(Length) -> + dripfeed(Rest, <<>>, State2, F); + {more, _, Acc2, State2} -> + dripfeed(Rest, Acc2, State2, F); + {done, _, <<>>} -> + ok; + {done, _, _, <<>>} -> + ok + end. +-endif. + +%% Identity. + +%% @doc Decode an identity stream. + +-spec stream_identity(Data, State) + -> {more, Data, Len, State} | {done, Data, Len, Data} + when Data::binary(), State::state(), Len::non_neg_integer(). +stream_identity(Data, {Streamed, Total}) -> + Streamed2 = Streamed + byte_size(Data), + if + Streamed2 < Total -> + {more, Data, Total - Streamed2, {Streamed2, Total}}; + true -> + Size = Total - Streamed, + << Data2:Size/binary, Rest/bits >> = Data, + {done, Data2, Total, Rest} + end. + +-spec identity(Data) -> Data when Data::iodata(). +identity(Data) -> + Data. + +-ifdef(TEST). +stream_identity_test() -> + {done, <<>>, 0, <<>>} + = stream_identity(identity(<<>>), {0, 0}), + {done, <<"\r\n">>, 2, <<>>} + = stream_identity(identity(<<"\r\n">>), {0, 2}), + {done, << 0:80000 >>, 10000, <<>>} + = stream_identity(identity(<< 0:80000 >>), {0, 10000}), + ok. + +stream_identity_parts_test() -> + {more, << 0:8000 >>, 1999, S1} + = stream_identity(<< 0:8000 >>, {0, 2999}), + {more, << 0:8000 >>, 999, S2} + = stream_identity(<< 0:8000 >>, S1), + {done, << 0:7992 >>, 2999, <<>>} + = stream_identity(<< 0:7992 >>, S2), + ok. + +%% Using the same data as the chunked one for comparison. +horse_stream_identity() -> + horse:repeat(10000, + stream_identity(<< + "4\r\n" + "Wiki\r\n" + "5\r\n" + "pedia\r\n" + "e\r\n" + " in\r\n\r\nchunks.\r\n" + "0\r\n" + "\r\n">>, {0, 43}) + ). + +horse_stream_identity_dripfeed() -> + horse:repeat(10000, + dripfeed(<< + "4\r\n" + "Wiki\r\n" + "5\r\n" + "pedia\r\n" + "e\r\n" + " in\r\n\r\nchunks.\r\n" + "0\r\n" + "\r\n">>, <<>>, {0, 43}, fun stream_identity/2) + ). +-endif. + +%% Chunked. + +%% @doc Decode a chunked stream. + +-spec stream_chunked(Data, State) + -> more | {more, Data, State} | {more, Data, Len, State} + | {more, Data, Data, State} + | {done, Len, Data} | {done, Data, Len, Data} + when Data::binary(), State::state(), Len::non_neg_integer(). +stream_chunked(Data, State) -> + stream_chunked(Data, State, <<>>). + +%% New chunk. +stream_chunked(Data = << C, _/bits >>, {0, Streamed}, Acc) when C =/= $\r -> + case chunked_len(Data, Streamed, Acc, 0) of + {next, Rest, State, Acc2} -> + stream_chunked(Rest, State, Acc2); + {more, State, Acc2} -> + {more, Acc2, Data, State}; + Ret -> + Ret + end; +%% Trailing \r\n before next chunk. +stream_chunked(<< "\r\n", Rest/bits >>, {2, Streamed}, Acc) -> + stream_chunked(Rest, {0, Streamed}, Acc); +%% Trailing \r before next chunk. +stream_chunked(<< "\r" >>, {2, Streamed}, Acc) -> + {more, Acc, {1, Streamed}}; +%% Trailing \n before next chunk. +stream_chunked(<< "\n", Rest/bits >>, {1, Streamed}, Acc) -> + stream_chunked(Rest, {0, Streamed}, Acc); +%% More data needed. +stream_chunked(<<>>, State = {Rem, _}, Acc) -> + {more, Acc, Rem, State}; +%% Chunk data. +stream_chunked(Data, {Rem, Streamed}, Acc) when Rem > 2 -> + DataSize = byte_size(Data), + RemSize = Rem - 2, + case Data of + << Chunk:RemSize/binary, "\r\n", Rest/bits >> -> + stream_chunked(Rest, {0, Streamed + RemSize}, << Acc/binary, Chunk/binary >>); + << Chunk:RemSize/binary, "\r" >> -> + {more, << Acc/binary, Chunk/binary >>, {1, Streamed + RemSize}}; + %% Everything in Data is part of the chunk. + _ -> + Rem2 = Rem - DataSize, + {more, << Acc/binary, Data/binary >>, Rem2, {Rem2, Streamed + DataSize}} + end. + +chunked_len(<< $0, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16); +chunked_len(<< $1, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 1); +chunked_len(<< $2, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 2); +chunked_len(<< $3, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 3); +chunked_len(<< $4, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 4); +chunked_len(<< $5, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 5); +chunked_len(<< $6, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 6); +chunked_len(<< $7, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 7); +chunked_len(<< $8, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 8); +chunked_len(<< $9, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 9); +chunked_len(<< $A, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 10); +chunked_len(<< $B, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 11); +chunked_len(<< $C, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 12); +chunked_len(<< $D, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 13); +chunked_len(<< $E, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 14); +chunked_len(<< $F, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 15); +chunked_len(<< $a, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 10); +chunked_len(<< $b, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 11); +chunked_len(<< $c, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 12); +chunked_len(<< $d, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 13); +chunked_len(<< $e, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 14); +chunked_len(<< $f, R/bits >>, S, A, Len) -> chunked_len(R, S, A, Len * 16 + 15); +%% Final chunk. +chunked_len(<< "\r\n\r\n", R/bits >>, S, <<>>, 0) -> {done, S, R}; +chunked_len(<< "\r\n\r\n", R/bits >>, S, A, 0) -> {done, A, S, R}; +chunked_len(_, _, _, 0) -> more; +%% Normal chunk. Add 2 to Len for the trailing \r\n. +chunked_len(<< "\r\n", R/bits >>, S, A, Len) -> {next, R, {Len + 2, S}, A}; +chunked_len(<<"\r">>, _, <<>>, _) -> more; +chunked_len(<<"\r">>, S, A, _) -> {more, {0, S}, A}; +chunked_len(<<>>, _, <<>>, _) -> more; +chunked_len(<<>>, S, A, _) -> {more, {0, S}, A}. + +%% @doc Encode a chunk. + +-spec chunk(D) -> D when D::iodata(). +chunk(Data) -> + [integer_to_list(iolist_size(Data), 16), <<"\r\n">>, + Data, <<"\r\n">>]. + +%% @doc Encode the last chunk of a chunked stream. + +-spec last_chunk() -> << _:40 >>. +last_chunk() -> + <<"0\r\n\r\n">>. + +-ifdef(TEST). +stream_chunked_identity_test() -> + {done, <<"Wikipedia in\r\n\r\nchunks.">>, 23, <<>>} + = stream_chunked(iolist_to_binary([ + chunk("Wiki"), + chunk("pedia"), + chunk(" in\r\n\r\nchunks."), + last_chunk() + ]), {0, 0}), + ok. + +stream_chunked_one_pass_test() -> + {done, 0, <<>>} = stream_chunked(<<"0\r\n\r\n">>, {0, 0}), + {done, <<"Wikipedia in\r\n\r\nchunks.">>, 23, <<>>} + = stream_chunked(<< + "4\r\n" + "Wiki\r\n" + "5\r\n" + "pedia\r\n" + "e\r\n" + " in\r\n\r\nchunks.\r\n" + "0\r\n" + "\r\n">>, {0, 0}), + ok. + +stream_chunked_n_passes_test() -> + S0 = {0, 0}, + more = stream_chunked(<<"4\r">>, S0), + {more, <<>>, 6, S1} = stream_chunked(<<"4\r\n">>, S0), + {more, <<"Wiki">>, 0, S2} = stream_chunked(<<"Wiki\r\n">>, S1), + {more, <<"pedia">>, <<"e\r">>, S3} = stream_chunked(<<"5\r\npedia\r\ne\r">>, S2), + {more, <<" in\r\n\r\nchunks.">>, 2, S4} = stream_chunked(<<"e\r\n in\r\n\r\nchunks.">>, S3), + {done, 23, <<>>} = stream_chunked(<<"\r\n0\r\n\r\n">>, S4), + %% A few extra for coverage purposes. + more = stream_chunked(<<"\n3">>, {1, 0}), + {more, <<"abc">>, 2, {2, 3}} = stream_chunked(<<"\n3\r\nabc">>, {1, 0}), + {more, <<"abc">>, {1, 3}} = stream_chunked(<<"3\r\nabc\r">>, {0, 0}), + {more, <<"abc">>, <<"123">>, {0, 3}} = stream_chunked(<<"3\r\nabc\r\n123">>, {0, 0}), + ok. + +stream_chunked_dripfeed_test() -> + dripfeed(<< + "4\r\n" + "Wiki\r\n" + "5\r\n" + "pedia\r\n" + "e\r\n" + " in\r\n\r\nchunks.\r\n" + "0\r\n" + "\r\n">>, <<>>, {0, 0}, fun stream_chunked/2). + +do_body_to_chunks(_, <<>>, Acc) -> + lists:reverse([<<"0\r\n\r\n">>|Acc]); +do_body_to_chunks(ChunkSize, Body, Acc) -> + BodySize = byte_size(Body), + ChunkSize2 = case BodySize < ChunkSize of + true -> BodySize; + false -> ChunkSize + end, + << Chunk:ChunkSize2/binary, Rest/binary >> = Body, + ChunkSizeBin = list_to_binary(integer_to_list(ChunkSize2, 16)), + do_body_to_chunks(ChunkSize, Rest, + [<< ChunkSizeBin/binary, "\r\n", Chunk/binary, "\r\n" >>|Acc]). + +stream_chunked_dripfeed2_test() -> + Body = list_to_binary(io_lib:format("~p", [lists:seq(1, 100)])), + Body2 = iolist_to_binary(do_body_to_chunks(50, Body, [])), + dripfeed(Body2, <<>>, {0, 0}, fun stream_chunked/2). + +stream_chunked_error_test_() -> + Tests = [ + {<<>>, undefined}, + {<<"\n\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa">>, {2, 0}} + ], + [{lists:flatten(io_lib:format("value ~p state ~p", [V, S])), + fun() -> {'EXIT', _} = (catch stream_chunked(V, S)) end} + || {V, S} <- Tests]. + +horse_stream_chunked() -> + horse:repeat(10000, + stream_chunked(<< + "4\r\n" + "Wiki\r\n" + "5\r\n" + "pedia\r\n" + "e\r\n" + " in\r\n\r\nchunks.\r\n" + "0\r\n" + "\r\n">>, {0, 0}) + ). + +horse_stream_chunked_dripfeed() -> + horse:repeat(10000, + dripfeed(<< + "4\r\n" + "Wiki\r\n" + "5\r\n" + "pedia\r\n" + "e\r\n" + " in\r\n\r\nchunks.\r\n" + "0\r\n" + "\r\n">>, <<>>, {0, 43}, fun stream_chunked/2) + ). +-endif. diff --git a/src/cow_mimetypes.erl b/src/cow_mimetypes.erl index 345fbac..58585b9 100644 --- a/src/cow_mimetypes.erl +++ b/src/cow_mimetypes.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2013, Loïc Hoguin <[email protected]> +%% Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above diff --git a/src/cow_mimetypes.erl.src b/src/cow_mimetypes.erl.src index a98c652..cf79b5b 100644 --- a/src/cow_mimetypes.erl.src +++ b/src/cow_mimetypes.erl.src @@ -1,4 +1,4 @@ -%% Copyright (c) 2013, Loïc Hoguin <[email protected]> +%% Copyright (c) 2013-2014, Loïc Hoguin <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above diff --git a/src/cow_multipart.erl b/src/cow_multipart.erl new file mode 100644 index 0000000..276a689 --- /dev/null +++ b/src/cow_multipart.erl @@ -0,0 +1,779 @@ +%% Copyright (c) 2014-2015, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-module(cow_multipart). + +%% Parsing. +-export([parse_headers/2]). +-export([parse_body/2]). + +%% Building. +-export([boundary/0]). +-export([first_part/2]). +-export([part/2]). +-export([close/1]). + +%% Headers. +-export([form_data/1]). +-export([parse_content_disposition/1]). +-export([parse_content_transfer_encoding/1]). +-export([parse_content_type/1]). + +-type headers() :: [{iodata(), iodata()}]. +-export_type([headers/0]). + +-include("cow_inline.hrl"). + +-define(TEST1_MIME, << + "This is a message with multiple parts in MIME format.\r\n" + "--frontier\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + "This is the body of the message.\r\n" + "--frontier\r\n" + "Content-Type: application/octet-stream\r\n" + "Content-Transfer-Encoding: base64\r\n" + "\r\n" + "PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" + "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==\r\n" + "--frontier--" +>>). +-define(TEST1_BOUNDARY, <<"frontier">>). + +-define(TEST2_MIME, << + "--AaB03x\r\n" + "Content-Disposition: form-data; name=\"submit-name\"\r\n" + "\r\n" + "Larry\r\n" + "--AaB03x\r\n" + "Content-Disposition: form-data; name=\"files\"\r\n" + "Content-Type: multipart/mixed; boundary=BbC04y\r\n" + "\r\n" + "--BbC04y\r\n" + "Content-Disposition: file; filename=\"file1.txt\"\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + "... contents of file1.txt ...\r\n" + "--BbC04y\r\n" + "Content-Disposition: file; filename=\"file2.gif\"\r\n" + "Content-Type: image/gif\r\n" + "Content-Transfer-Encoding: binary\r\n" + "\r\n" + "...contents of file2.gif...\r\n" + "--BbC04y--\r\n" + "--AaB03x--" +>>). +-define(TEST2_BOUNDARY, <<"AaB03x">>). + +-define(TEST3_MIME, << + "This is the preamble.\r\n" + "--boundary\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + "This is the body of the message.\r\n" + "--boundary--" + "\r\nThis is the epilogue. Here it includes leading CRLF" +>>). +-define(TEST3_BOUNDARY, <<"boundary">>). + +-define(TEST4_MIME, << + "This is the preamble.\r\n" + "--boundary\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + "This is the body of the message.\r\n" + "--boundary--" + "\r\n" +>>). +-define(TEST4_BOUNDARY, <<"boundary">>). + +%% RFC 2046, Section 5.1.1 +-define(TEST5_MIME, << + "This is the preamble. It is to be ignored, though it\r\n" + "is a handy place for composition agents to include an\r\n" + "explanatory note to non-MIME conformant readers.\r\n" + "\r\n" + "--simple boundary\r\n", + "\r\n" + "This is implicitly typed plain US-ASCII text.\r\n" + "It does NOT end with a linebreak." + "\r\n" + "--simple boundary\r\n", + "Content-type: text/plain; charset=us-ascii\r\n" + "\r\n" + "This is explicitly typed plain US-ASCII text.\r\n" + "It DOES end with a linebreak.\r\n" + "\r\n" + "--simple boundary--\r\n" + "\r\n" + "This is the epilogue. It is also to be ignored." +>>). +-define(TEST5_BOUNDARY, <<"simple boundary">>). + +%% Parsing. +%% +%% The multipart format is defined in RFC 2045. + +%% @doc Parse the headers for the next multipart part. +%% +%% This function skips any preamble before the boundary. +%% The preamble may be retrieved using parse_body/2. +%% +%% This function will accept input of any size, it is +%% up to the caller to limit it if needed. + +-spec parse_headers(binary(), binary()) + -> more | {more, binary()} + | {ok, headers(), binary()} + | {done, binary()}. +%% If the stream starts with the boundary we can make a few assumptions +%% and quickly figure out if we got the complete list of headers. +parse_headers(<< "--", Stream/bits >>, Boundary) -> + BoundarySize = byte_size(Boundary), + case Stream of + %% Last boundary. Return the epilogue. + << Boundary:BoundarySize/binary, "--", Stream2/bits >> -> + {done, Stream2}; + << Boundary:BoundarySize/binary, Stream2/bits >> -> + %% We have all the headers only if there is a \r\n\r\n + %% somewhere in the data after the boundary. + case binary:match(Stream2, <<"\r\n\r\n">>) of + nomatch -> + more; + _ -> + before_parse_headers(Stream2) + end; + %% If there isn't enough to represent Boundary \r\n\r\n + %% then we definitely don't have all the headers. + _ when byte_size(Stream) < byte_size(Boundary) + 4 -> + more; + %% Otherwise we have preamble data to skip. + %% We still got rid of the first two misleading bytes. + _ -> + skip_preamble(Stream, Boundary) + end; +%% Otherwise we have preamble data to skip. +parse_headers(Stream, Boundary) -> + skip_preamble(Stream, Boundary). + +%% We need to find the boundary and a \r\n\r\n after that. +%% Since the boundary isn't at the start, it must be right +%% after a \r\n too. +skip_preamble(Stream, Boundary) -> + case binary:match(Stream, <<"\r\n--", Boundary/bits >>) of + %% No boundary, need more data. + nomatch -> + %% We can safely skip the size of the stream + %% minus the last 3 bytes which may be a partial boundary. + SkipSize = byte_size(Stream) - 3, + case SkipSize > 0 of + false -> + more; + true -> + << _:SkipSize/binary, Stream2/bits >> = Stream, + {more, Stream2} + end; + {Start, Length} -> + Start2 = Start + Length, + << _:Start2/binary, Stream2/bits >> = Stream, + case Stream2 of + %% Last boundary. Return the epilogue. + << "--", Stream3/bits >> -> + {done, Stream3}; + _ -> + case binary:match(Stream, <<"\r\n\r\n">>) of + %% We don't have the full headers. + nomatch -> + {more, Stream2}; + _ -> + before_parse_headers(Stream2) + end + end + end. + +before_parse_headers(<< "\r\n\r\n", Stream/bits >>) -> + %% This indicates that there are no headers, so we can abort immediately. + {ok, [], Stream}; +before_parse_headers(<< "\r\n", Stream/bits >>) -> + %% There is a line break right after the boundary, skip it. + parse_hd_name(Stream, [], <<>>). + +parse_hd_name(<< C, Rest/bits >>, H, SoFar) -> + case C of + $: -> parse_hd_before_value(Rest, H, SoFar); + $\s -> parse_hd_name_ws(Rest, H, SoFar); + $\t -> parse_hd_name_ws(Rest, H, SoFar); + _ -> ?LOWER(parse_hd_name, Rest, H, SoFar) + end. + +parse_hd_name_ws(<< C, Rest/bits >>, H, Name) -> + case C of + $\s -> parse_hd_name_ws(Rest, H, Name); + $\t -> parse_hd_name_ws(Rest, H, Name); + $: -> parse_hd_before_value(Rest, H, Name) + end. + +parse_hd_before_value(<< $\s, Rest/bits >>, H, N) -> + parse_hd_before_value(Rest, H, N); +parse_hd_before_value(<< $\t, Rest/bits >>, H, N) -> + parse_hd_before_value(Rest, H, N); +parse_hd_before_value(Buffer, H, N) -> + parse_hd_value(Buffer, H, N, <<>>). + +parse_hd_value(<< $\r, Rest/bits >>, Headers, Name, SoFar) -> + case Rest of + << "\n\r\n", Rest2/bits >> -> + {ok, [{Name, SoFar}|Headers], Rest2}; + << $\n, C, Rest2/bits >> when C =:= $\s; C =:= $\t -> + parse_hd_value(Rest2, Headers, Name, SoFar); + << $\n, Rest2/bits >> -> + parse_hd_name(Rest2, [{Name, SoFar}|Headers], <<>>) + end; +parse_hd_value(<< C, Rest/bits >>, H, N, SoFar) -> + parse_hd_value(Rest, H, N, << SoFar/binary, C >>). + +%% @doc Parse the body of the current multipart part. +%% +%% The body is everything until the next boundary. + +-spec parse_body(binary(), binary()) + -> {ok, binary()} | {ok, binary(), binary()} + | done | {done, binary()} | {done, binary(), binary()}. +parse_body(Stream, Boundary) -> + BoundarySize = byte_size(Boundary), + case Stream of + << "--", Boundary:BoundarySize/binary, _/bits >> -> + done; + _ -> + case binary:match(Stream, << "\r\n--", Boundary/bits >>) of + %% No boundary, check for a possible partial at the end. + %% Return more or less of the body depending on the result. + nomatch -> + StreamSize = byte_size(Stream), + From = StreamSize - BoundarySize - 3, + MatchOpts = if + %% Binary too small to contain boundary, check it fully. + From < 0 -> []; + %% Optimize, only check the end of the binary. + true -> [{scope, {From, StreamSize - From}}] + end, + case binary:match(Stream, <<"\r">>, MatchOpts) of + nomatch -> + {ok, Stream}; + {Pos, _} -> + case Stream of + << Body:Pos/binary >> -> + {ok, Body}; + << Body:Pos/binary, Rest/bits >> -> + {ok, Body, Rest} + end + end; + %% Boundary found, this is the last chunk of the body. + {Pos, _} -> + case Stream of + << Body:Pos/binary, "\r\n" >> -> + {done, Body}; + << Body:Pos/binary, "\r\n", Rest/bits >> -> + {done, Body, Rest}; + << Body:Pos/binary, Rest/bits >> -> + {done, Body, Rest} + end + end + end. + +-ifdef(TEST). +parse_test() -> + H1 = [{<<"content-type">>, <<"text/plain">>}], + Body1 = <<"This is the body of the message.">>, + H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>}, + {<<"content-transfer-encoding">>, <<"base64">>}]), + Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" + "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, + {ok, H1, Rest} = parse_headers(?TEST1_MIME, ?TEST1_BOUNDARY), + {done, Body1, Rest2} = parse_body(Rest, ?TEST1_BOUNDARY), + done = parse_body(Rest2, ?TEST1_BOUNDARY), + {ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST1_BOUNDARY), + H2 = lists:sort(H2Unsorted), + {done, Body2, Rest4} = parse_body(Rest3, ?TEST1_BOUNDARY), + done = parse_body(Rest4, ?TEST1_BOUNDARY), + {done, <<>>} = parse_headers(Rest4, ?TEST1_BOUNDARY), + ok. + +parse_interleaved_test() -> + H1 = [{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}], + Body1 = <<"Larry">>, + H2 = lists:sort([{<<"content-disposition">>, <<"form-data; name=\"files\"">>}, + {<<"content-type">>, <<"multipart/mixed; boundary=BbC04y">>}]), + InH1 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file1.txt\"">>}, + {<<"content-type">>, <<"text/plain">>}]), + InBody1 = <<"... contents of file1.txt ...">>, + InH2 = lists:sort([{<<"content-disposition">>, <<"file; filename=\"file2.gif\"">>}, + {<<"content-type">>, <<"image/gif">>}, + {<<"content-transfer-encoding">>, <<"binary">>}]), + InBody2 = <<"...contents of file2.gif...">>, + {ok, H1, Rest} = parse_headers(?TEST2_MIME, ?TEST2_BOUNDARY), + {done, Body1, Rest2} = parse_body(Rest, ?TEST2_BOUNDARY), + done = parse_body(Rest2, ?TEST2_BOUNDARY), + {ok, H2Unsorted, Rest3} = parse_headers(Rest2, ?TEST2_BOUNDARY), + H2 = lists:sort(H2Unsorted), + {_, ContentType} = lists:keyfind(<<"content-type">>, 1, H2), + {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, InBoundary}]} + = parse_content_type(ContentType), + {ok, InH1Unsorted, InRest} = parse_headers(Rest3, InBoundary), + InH1 = lists:sort(InH1Unsorted), + {done, InBody1, InRest2} = parse_body(InRest, InBoundary), + done = parse_body(InRest2, InBoundary), + {ok, InH2Unsorted, InRest3} = parse_headers(InRest2, InBoundary), + InH2 = lists:sort(InH2Unsorted), + {done, InBody2, InRest4} = parse_body(InRest3, InBoundary), + done = parse_body(InRest4, InBoundary), + {done, Rest4} = parse_headers(InRest4, InBoundary), + {done, <<>>} = parse_headers(Rest4, ?TEST2_BOUNDARY), + ok. + +parse_epilogue_test() -> + H1 = [{<<"content-type">>, <<"text/plain">>}], + Body1 = <<"This is the body of the message.">>, + Epilogue = <<"\r\nThis is the epilogue. Here it includes leading CRLF">>, + {ok, H1, Rest} = parse_headers(?TEST3_MIME, ?TEST3_BOUNDARY), + {done, Body1, Rest2} = parse_body(Rest, ?TEST3_BOUNDARY), + done = parse_body(Rest2, ?TEST3_BOUNDARY), + {done, Epilogue} = parse_headers(Rest2, ?TEST3_BOUNDARY), + ok. + +parse_epilogue_crlf_test() -> + H1 = [{<<"content-type">>, <<"text/plain">>}], + Body1 = <<"This is the body of the message.">>, + Epilogue = <<"\r\n">>, + {ok, H1, Rest} = parse_headers(?TEST4_MIME, ?TEST4_BOUNDARY), + {done, Body1, Rest2} = parse_body(Rest, ?TEST4_BOUNDARY), + done = parse_body(Rest2, ?TEST4_BOUNDARY), + {done, Epilogue} = parse_headers(Rest2, ?TEST4_BOUNDARY), + ok. + +parse_rfc2046_test() -> + %% The following is an example included in RFC 2046, Section 5.1.1. + Body1 = <<"This is implicitly typed plain US-ASCII text.\r\n" + "It does NOT end with a linebreak.">>, + Body2 = <<"This is explicitly typed plain US-ASCII text.\r\n" + "It DOES end with a linebreak.\r\n">>, + H2 = [{<<"content-type">>, <<"text/plain; charset=us-ascii">>}], + Epilogue = <<"\r\n\r\nThis is the epilogue. It is also to be ignored.">>, + {ok, [], Rest} = parse_headers(?TEST5_MIME, ?TEST5_BOUNDARY), + {done, Body1, Rest2} = parse_body(Rest, ?TEST5_BOUNDARY), + {ok, H2, Rest3} = parse_headers(Rest2, ?TEST5_BOUNDARY), + {done, Body2, Rest4} = parse_body(Rest3, ?TEST5_BOUNDARY), + {done, Epilogue} = parse_headers(Rest4, ?TEST5_BOUNDARY), + ok. + +parse_partial_test() -> + {ok, <<0:8000, "abcdef">>, <<"\rghij">>} + = parse_body(<<0:8000, "abcdef\rghij">>, <<"boundary">>), + {ok, <<"abcdef">>, <<"\rghij">>} + = parse_body(<<"abcdef\rghij">>, <<"boundary">>), + {ok, <<"abc">>, <<"\rdef">>} + = parse_body(<<"abc\rdef">>, <<"boundaryboundary">>), + {ok, <<0:8000, "abcdef">>, <<"\r\nghij">>} + = parse_body(<<0:8000, "abcdef\r\nghij">>, <<"boundary">>), + {ok, <<"abcdef">>, <<"\r\nghij">>} + = parse_body(<<"abcdef\r\nghij">>, <<"boundary">>), + {ok, <<"abc">>, <<"\r\ndef">>} + = parse_body(<<"abc\r\ndef">>, <<"boundaryboundary">>), + {ok, <<"boundary">>, <<"\r">>} + = parse_body(<<"boundary\r">>, <<"boundary">>), + {ok, <<"boundary">>, <<"\r\n">>} + = parse_body(<<"boundary\r\n">>, <<"boundary">>), + {ok, <<"boundary">>, <<"\r\n-">>} + = parse_body(<<"boundary\r\n-">>, <<"boundary">>), + {ok, <<"boundary">>, <<"\r\n--">>} + = parse_body(<<"boundary\r\n--">>, <<"boundary">>), + ok. + +perf_parse_multipart(Stream, Boundary) -> + case parse_headers(Stream, Boundary) of + {ok, _, Rest} -> + {_, _, Rest2} = parse_body(Rest, Boundary), + perf_parse_multipart(Rest2, Boundary); + {done, _} -> + ok + end. + +horse_parse() -> + horse:repeat(50000, + perf_parse_multipart(?TEST1_MIME, ?TEST1_BOUNDARY) + ). +-endif. + +%% Building. + +%% @doc Generate a new random boundary. +%% +%% The boundary generated has a low probability of ever appearing +%% in the data. + +-spec boundary() -> binary(). +boundary() -> + base64:encode(crypto:rand_bytes(48)). + +%% @doc Return the first part's head. +%% +%% This works exactly like the part/2 function except there is +%% no leading \r\n. It's not required to use this function, +%% just makes the output a little smaller and prettier. + +-spec first_part(binary(), headers()) -> iodata(). +first_part(Boundary, Headers) -> + [<<"--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. + +%% @doc Return a part's head. + +-spec part(binary(), headers()) -> iodata(). +part(Boundary, Headers) -> + [<<"\r\n--">>, Boundary, <<"\r\n">>, headers_to_iolist(Headers, [])]. + +headers_to_iolist([], Acc) -> + lists:reverse([<<"\r\n">>|Acc]); +headers_to_iolist([{N, V}|Tail], Acc) -> + %% We don't want to create a sublist so we list the + %% values in reverse order so that it gets reversed properly. + headers_to_iolist(Tail, [<<"\r\n">>, V, <<": ">>, N|Acc]). + +%% @doc Return the closing delimiter of the multipart message. + +-spec close(binary()) -> iodata(). +close(Boundary) -> + [<<"\r\n--">>, Boundary, <<"--">>]. + +-ifdef(TEST). +build_test() -> + Result = string:to_lower(binary_to_list(?TEST1_MIME)), + Result = string:to_lower(binary_to_list(iolist_to_binary([ + <<"This is a message with multiple parts in MIME format.\r\n">>, + first_part(?TEST1_BOUNDARY, [{<<"content-type">>, <<"text/plain">>}]), + <<"This is the body of the message.">>, + part(?TEST1_BOUNDARY, [ + {<<"content-type">>, <<"application/octet-stream">>}, + {<<"content-transfer-encoding">>, <<"base64">>}]), + <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" + "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, + close(?TEST1_BOUNDARY) + ]))), + ok. + +identity_test() -> + B = boundary(), + Preamble = <<"This is a message with multiple parts in MIME format.">>, + H1 = [{<<"content-type">>, <<"text/plain">>}], + Body1 = <<"This is the body of the message.">>, + H2 = lists:sort([{<<"content-type">>, <<"application/octet-stream">>}, + {<<"content-transfer-encoding">>, <<"base64">>}]), + Body2 = <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" + "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, + Epilogue = <<"Gotta go fast!">>, + M = iolist_to_binary([ + Preamble, + part(B, H1), Body1, + part(B, H2), Body2, + close(B), + Epilogue + ]), + {done, Preamble, M2} = parse_body(M, B), + {ok, H1, M3} = parse_headers(M2, B), + {done, Body1, M4} = parse_body(M3, B), + {ok, H2Unsorted, M5} = parse_headers(M4, B), + H2 = lists:sort(H2Unsorted), + {done, Body2, M6} = parse_body(M5, B), + {done, Epilogue} = parse_headers(M6, B), + ok. + +perf_build_multipart() -> + B = boundary(), + [ + <<"preamble\r\n">>, + first_part(B, [{<<"content-type">>, <<"text/plain">>}]), + <<"This is the body of the message.">>, + part(B, [ + {<<"content-type">>, <<"application/octet-stream">>}, + {<<"content-transfer-encoding">>, <<"base64">>}]), + <<"PGh0bWw+CiAgPGhlYWQ+CiAgPC9oZWFkPgogIDxib2R5PgogICAgPHA+VGhpcyBpcyB0aGUg\r\n" + "Ym9keSBvZiB0aGUgbWVzc2FnZS48L3A+CiAgPC9ib2R5Pgo8L2h0bWw+Cg==">>, + close(B), + <<"epilogue">> + ]. + +horse_build() -> + horse:repeat(50000, + perf_build_multipart() + ). +-endif. + +%% Headers. + +%% @doc Convenience function for extracting information from headers +%% when parsing a multipart/form-data stream. + +-spec form_data(headers()) + -> {data, binary()} + | {file, binary(), binary(), binary(), binary()}. +form_data(Headers) -> + {_, DispositionBin} = lists:keyfind(<<"content-disposition">>, 1, Headers), + {<<"form-data">>, Params} = parse_content_disposition(DispositionBin), + {_, FieldName} = lists:keyfind(<<"name">>, 1, Params), + case lists:keyfind(<<"filename">>, 1, Params) of + false -> + {data, FieldName}; + {_, Filename} -> + Type = case lists:keyfind(<<"content-type">>, 1, Headers) of + false -> <<"text/plain">>; + {_, T} -> T + end, + TransferEncoding = case lists:keyfind( + <<"content-transfer-encoding">>, 1, Headers) of + false -> <<"7bit">>; + {_, TE} -> TE + end, + {file, FieldName, Filename, Type, TransferEncoding} + end. + +-ifdef(TEST). +form_data_test_() -> + Tests = [ + {[{<<"content-disposition">>, <<"form-data; name=\"submit-name\"">>}], + {data, <<"submit-name">>}}, + {[{<<"content-disposition">>, + <<"form-data; name=\"files\"; filename=\"file1.txt\"">>}, + {<<"content-type">>, <<"text/x-plain">>}], + {file, <<"files">>, <<"file1.txt">>, + <<"text/x-plain">>, <<"7bit">>}} + ], + [{lists:flatten(io_lib:format("~p", [V])), + fun() -> R = form_data(V) end} || {V, R} <- Tests]. +-endif. + +%% @todo parse_content_description +%% @todo parse_content_id + +%% @doc Parse an RFC 2183 content-disposition value. +%% @todo Support RFC 2231. + +-spec parse_content_disposition(binary()) + -> {binary(), [{binary(), binary()}]}. +parse_content_disposition(Bin) -> + parse_cd_type(Bin, <<>>). + +parse_cd_type(<<>>, Acc) -> + {Acc, []}; +parse_cd_type(<< C, Rest/bits >>, Acc) -> + case C of + $; -> {Acc, parse_before_param(Rest, [])}; + $\s -> {Acc, parse_before_param(Rest, [])}; + $\t -> {Acc, parse_before_param(Rest, [])}; + _ -> ?LOWER(parse_cd_type, Rest, Acc) + end. + +-ifdef(TEST). +parse_content_disposition_test_() -> + Tests = [ + {<<"inline">>, {<<"inline">>, []}}, + {<<"attachment">>, {<<"attachment">>, []}}, + {<<"attachment; filename=genome.jpeg;" + " modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>, + {<<"attachment">>, [ + {<<"filename">>, <<"genome.jpeg">>}, + {<<"modification-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>} + ]}}, + {<<"form-data; name=\"user\"">>, + {<<"form-data">>, [{<<"name">>, <<"user">>}]}}, + {<<"form-data; NAME=\"submit-name\"">>, + {<<"form-data">>, [{<<"name">>, <<"submit-name">>}]}}, + {<<"form-data; name=\"files\"; filename=\"file1.txt\"">>, + {<<"form-data">>, [ + {<<"name">>, <<"files">>}, + {<<"filename">>, <<"file1.txt">>} + ]}}, + {<<"file; filename=\"file1.txt\"">>, + {<<"file">>, [{<<"filename">>, <<"file1.txt">>}]}}, + {<<"file; filename=\"file2.gif\"">>, + {<<"file">>, [{<<"filename">>, <<"file2.gif">>}]}} + ], + [{V, fun() -> R = parse_content_disposition(V) end} || {V, R} <- Tests]. + +horse_parse_content_disposition_attachment() -> + horse:repeat(100000, + parse_content_disposition(<<"attachment; filename=genome.jpeg;" + " modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";">>) + ). + +horse_parse_content_disposition_form_data() -> + horse:repeat(100000, + parse_content_disposition( + <<"form-data; name=\"files\"; filename=\"file1.txt\"">>) + ). + +horse_parse_content_disposition_inline() -> + horse:repeat(100000, + parse_content_disposition(<<"inline">>) + ). +-endif. + +%% @doc Parse an RFC 2045 content-transfer-encoding header. + +-spec parse_content_transfer_encoding(binary()) -> binary(). +parse_content_transfer_encoding(Bin) -> + ?LOWER(Bin). + +-ifdef(TEST). +parse_content_transfer_encoding_test_() -> + Tests = [ + {<<"7bit">>, <<"7bit">>}, + {<<"7BIT">>, <<"7bit">>}, + {<<"8bit">>, <<"8bit">>}, + {<<"binary">>, <<"binary">>}, + {<<"quoted-printable">>, <<"quoted-printable">>}, + {<<"base64">>, <<"base64">>}, + {<<"Base64">>, <<"base64">>}, + {<<"BASE64">>, <<"base64">>}, + {<<"bAsE64">>, <<"base64">>} + ], + [{V, fun() -> R = parse_content_transfer_encoding(V) end} + || {V, R} <- Tests]. + +horse_parse_content_transfer_encoding() -> + horse:repeat(100000, + parse_content_transfer_encoding(<<"QUOTED-PRINTABLE">>) + ). +-endif. + +%% @doc Parse an RFC 2045 content-type header. + +-spec parse_content_type(binary()) + -> {binary(), binary(), [{binary(), binary()}]}. +parse_content_type(Bin) -> + parse_ct_type(Bin, <<>>). + +parse_ct_type(<< C, Rest/bits >>, Acc) -> + case C of + $/ -> parse_ct_subtype(Rest, Acc, <<>>); + _ -> ?LOWER(parse_ct_type, Rest, Acc) + end. + +parse_ct_subtype(<<>>, Type, Subtype) when Subtype =/= <<>> -> + {Type, Subtype, []}; +parse_ct_subtype(<< C, Rest/bits >>, Type, Acc) -> + case C of + $; -> {Type, Acc, parse_before_param(Rest, [])}; + $\s -> {Type, Acc, parse_before_param(Rest, [])}; + $\t -> {Type, Acc, parse_before_param(Rest, [])}; + _ -> ?LOWER(parse_ct_subtype, Rest, Type, Acc) + end. + +-ifdef(TEST). +parse_content_type_test_() -> + Tests = [ + {<<"image/gif">>, + {<<"image">>, <<"gif">>, []}}, + {<<"text/plain">>, + {<<"text">>, <<"plain">>, []}}, + {<<"text/plain; charset=us-ascii">>, + {<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}}, + {<<"text/plain; charset=\"us-ascii\"">>, + {<<"text">>, <<"plain">>, [{<<"charset">>, <<"us-ascii">>}]}}, + {<<"multipart/form-data; boundary=AaB03x">>, + {<<"multipart">>, <<"form-data">>, + [{<<"boundary">>, <<"AaB03x">>}]}}, + {<<"multipart/mixed; boundary=BbC04y">>, + {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"BbC04y">>}]}}, + {<<"multipart/mixed; boundary=--------">>, + {<<"multipart">>, <<"mixed">>, [{<<"boundary">>, <<"--------">>}]}}, + {<<"application/x-horse; filename=genome.jpeg;" + " some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";" + " charset=us-ascii; empty=; number=12345">>, + {<<"application">>, <<"x-horse">>, [ + {<<"filename">>, <<"genome.jpeg">>}, + {<<"some-date">>, <<"Wed, 12 Feb 1997 16:29:51 -0500">>}, + {<<"charset">>, <<"us-ascii">>}, + {<<"empty">>, <<>>}, + {<<"number">>, <<"12345">>} + ]}} + ], + [{V, fun() -> R = parse_content_type(V) end} + || {V, R} <- Tests]. + +horse_parse_content_type_zero() -> + horse:repeat(100000, + parse_content_type(<<"text/plain">>) + ). + +horse_parse_content_type_one() -> + horse:repeat(100000, + parse_content_type(<<"text/plain; charset=\"us-ascii\"">>) + ). + +horse_parse_content_type_five() -> + horse:repeat(100000, + parse_content_type(<<"application/x-horse; filename=genome.jpeg;" + " some-date=\"Wed, 12 Feb 1997 16:29:51 -0500\";" + " charset=us-ascii; empty=; number=12345">>) + ). +-endif. + +%% @doc Parse RFC 2045 parameters. + +parse_before_param(<<>>, Params) -> + lists:reverse(Params); +parse_before_param(<< C, Rest/bits >>, Params) -> + case C of + $; -> parse_before_param(Rest, Params); + $\s -> parse_before_param(Rest, Params); + $\t -> parse_before_param(Rest, Params); + _ -> ?LOWER(parse_param_name, Rest, Params, <<>>) + end. + +parse_param_name(<<>>, Params, Acc) -> + lists:reverse([{Acc, <<>>}|Params]); +parse_param_name(<< C, Rest/bits >>, Params, Acc) -> + case C of + $= -> parse_param_value(Rest, Params, Acc); + _ -> ?LOWER(parse_param_name, Rest, Params, Acc) + end. + +parse_param_value(<<>>, Params, Name) -> + lists:reverse([{Name, <<>>}|Params]); +parse_param_value(<< C, Rest/bits >>, Params, Name) -> + case C of + $" -> parse_param_quoted_value(Rest, Params, Name, <<>>); + $; -> parse_before_param(Rest, [{Name, <<>>}|Params]); + $\s -> parse_before_param(Rest, [{Name, <<>>}|Params]); + $\t -> parse_before_param(Rest, [{Name, <<>>}|Params]); + C -> parse_param_value(Rest, Params, Name, << C >>) + end. + +parse_param_value(<<>>, Params, Name, Acc) -> + lists:reverse([{Name, Acc}|Params]); +parse_param_value(<< C, Rest/bits >>, Params, Name, Acc) -> + case C of + $; -> parse_before_param(Rest, [{Name, Acc}|Params]); + $\s -> parse_before_param(Rest, [{Name, Acc}|Params]); + $\t -> parse_before_param(Rest, [{Name, Acc}|Params]); + C -> parse_param_value(Rest, Params, Name, << Acc/binary, C >>) + end. + +%% We expect a final $" so no need to test for <<>>. +parse_param_quoted_value(<< $\\, C, Rest/bits >>, Params, Name, Acc) -> + parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>); +parse_param_quoted_value(<< $", Rest/bits >>, Params, Name, Acc) -> + parse_before_param(Rest, [{Name, Acc}|Params]); +parse_param_quoted_value(<< C, Rest/bits >>, Params, Name, Acc) + when C =/= $\r -> + parse_param_quoted_value(Rest, Params, Name, << Acc/binary, C >>). diff --git a/src/cow_qs.erl b/src/cow_qs.erl index 5e338b2..33d385b 100644 --- a/src/cow_qs.erl +++ b/src/cow_qs.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2013, Loïc Hoguin <[email protected]> +%% Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above @@ -126,9 +126,7 @@ parse_qs_identity_test_() -> "b-sid=521732&ortb-xt=IAB3&ortb-ugc=">> ], [{V, fun() -> V = qs(parse_qs(V)) end} || V <- Tests]. --endif. --ifdef(PERF). horse_parse_qs_shorter() -> horse:repeat(20000, parse_qs(<<"hl=en&q=erlang%20cowboy">>) @@ -310,9 +308,7 @@ qs_identity_test_() -> [{lists:flatten(io_lib:format("~p", [V])), fun() -> V = parse_qs(qs(V)) end} || V <- Tests]. --endif. --ifdef(PERF). horse_qs_shorter() -> horse:repeat(20000, qs(?QS_SHORTER)). @@ -395,9 +391,7 @@ urldecode_identity_test_() -> "%BE%8B%E3%80%9C">> ], [{V, fun() -> V = urlencode(urldecode(V)) end} || V <- Tests]. --endif. --ifdef(PERF). horse_urldecode() -> horse:repeat(100000, urldecode(<<"nothingnothingnothingnothing">>) @@ -544,9 +538,7 @@ urlencode_identity_test_() -> 129,153,227,130,139,230,151,139,229,190,139,227,128,156>> ], [{V, fun() -> V = urldecode(urlencode(V)) end} || V <- Tests]. --endif. --ifdef(PERF). horse_urlencode() -> horse:repeat(100000, urlencode(<<"nothingnothingnothingnothing">>) diff --git a/src/cow_spdy.erl b/src/cow_spdy.erl index 8484180..a906ddf 100644 --- a/src/cow_spdy.erl +++ b/src/cow_spdy.erl @@ -1,4 +1,4 @@ -%% Copyright (c) 2013, Loïc Hoguin <[email protected]> +%% Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above @@ -169,7 +169,7 @@ parse_headers(<< L1:32, Key:L1/binary, L2:32, Value:L2/binary, Rest/bits >>, case Key of << $:, _/bits >> -> parse_headers(Rest, NbHeaders - 1, Acc, - [{Key, Value}|lists:keydelete(Key, 1, SpAcc)]); + lists:keystore(Key, 1, SpAcc, {Key, Value})); _ -> parse_headers(Rest, NbHeaders - 1, [{Key, Value}|Acc], SpAcc) end. diff --git a/src/cow_ws.erl b/src/cow_ws.erl new file mode 100644 index 0000000..c89c17a --- /dev/null +++ b/src/cow_ws.erl @@ -0,0 +1,599 @@ +%% Copyright (c) 2015, Loïc Hoguin <[email protected]> +%% +%% Permission to use, copy, modify, and/or distribute this software for any +%% purpose with or without fee is hereby granted, provided that the above +%% copyright notice and this permission notice appear in all copies. +%% +%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +-module(cow_ws). + +-export([key/0]). +-export([encode_key/1]). + +-export([negotiate_permessage_deflate/3]). +-export([negotiate_x_webkit_deflate_frame/3]). + +-export([validate_permessage_deflate/3]). + +-export([parse_header/3]). +-export([parse_payload/9]). +-export([make_frame/4]). + +-export([frame/2]). +-export([masked_frame/2]). + +-type close_code() :: 1000..1003 | 1006..1011 | 3000..4999. +-export_type([close_code/0]). + +-type extensions() :: map(). +-export_type([extensions/0]). + +-type frag_state() :: undefined | {fin | nofin, text | binary, rsv()}. +-export_type([frag_state/0]). + +-type frame() :: close | ping | pong + | {text | binary | close | ping | pong, iodata()} + | {close, close_code(), iodata()} + | {fragment, fin | nofin, text | binary | continuation, iodata()}. +-export_type([frame/0]). + +-type frame_type() :: fragment | text | binary | close | ping | pong. +-export_type([frame_type/0]). + +-type mask_key() :: undefined | 0..16#ffffffff. +-export_type([mask_key/0]). + +-type rsv() :: <<_:3>>. +-export_type([rsv/0]). + +-type utf8_state() :: 0..8. +-export_type([utf8_state/0]). + +%% @doc Generate a key for the Websocket handshake request. + +-spec key() -> binary(). +key() -> + base64:encode(crypto:rand_bytes(16)). + +%% @doc Encode the key into the accept value for the Websocket handshake response. + +-spec encode_key(binary()) -> binary(). +encode_key(Key) -> + base64:encode(crypto:hash(sha, [Key, "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"])). + +%% @doc Negotiate the permessage-deflate extension. + +%% Ignore if deflate already negotiated. +negotiate_permessage_deflate(_, #{deflate := _}, _) -> + ignore; +negotiate_permessage_deflate(Params, Extensions, Opts) -> + case lists:usort(Params) of + %% Ignore if multiple parameters with the same name. + Params2 when length(Params) =/= length(Params2) -> + ignore; + Params2 -> + %% @todo Might want to make these configurable defaults. + case parse_request_permessage_deflate_params(Params2, 15, takeover, 15, takeover, []) of + ignore -> + ignore; + {ClientWindowBits, ClientTakeOver, ServerWindowBits, ServerTakeOver, RespParams} -> + {Inflate, Deflate} = init_permessage_deflate(ClientWindowBits, ServerWindowBits, Opts), + {ok, [<<"permessage-deflate">>, RespParams], + Extensions#{ + deflate => Deflate, + deflate_takeover => ServerTakeOver, + inflate => Inflate, + inflate_takeover => ClientTakeOver}} + end + end. + +parse_request_permessage_deflate_params([], CB, CTO, SB, STO, RespParams) -> + {CB, CTO, SB, STO, RespParams}; +parse_request_permessage_deflate_params([<<"client_max_window_bits">>|Tail], CB, CTO, SB, STO, RespParams) -> + parse_request_permessage_deflate_params(Tail, CB, CTO, SB, STO, + [<<"; ">>, <<"client_max_window_bits=">>, integer_to_binary(CB)|RespParams]); +parse_request_permessage_deflate_params([{<<"client_max_window_bits">>, Max}|Tail], _, CTO, SB, STO, RespParams) -> + case parse_max_window_bits(Max) of + error -> + ignore; + CB -> + parse_request_permessage_deflate_params(Tail, CB, CTO, SB, STO, + [<<"; ">>, <<"client_max_window_bits=">>, Max|RespParams]) + end; +parse_request_permessage_deflate_params([<<"client_no_context_takeover">>|Tail], CB, _, SB, STO, RespParams) -> + parse_request_permessage_deflate_params(Tail, CB, no_takeover, SB, STO, [<<"; ">>, <<"client_no_context_takeover">>|RespParams]); +parse_request_permessage_deflate_params([{<<"server_max_window_bits">>, Max}|Tail], CB, CTO, _, STO, RespParams) -> + case parse_max_window_bits(Max) of + error -> + ignore; + SB -> + parse_request_permessage_deflate_params(Tail, CB, CTO, SB, STO, + [<<"; ">>, <<"server_max_window_bits=">>, Max|RespParams]) + end; +parse_request_permessage_deflate_params([<<"server_no_context_takeover">>|Tail], CB, CTO, SB, _, RespParams) -> + parse_request_permessage_deflate_params(Tail, CB, CTO, SB, no_takeover, [<<"; ">>, <<"server_no_context_takeover">>|RespParams]); +%% Ignore if unknown parameter; ignore if parameter with invalid or missing value. +parse_request_permessage_deflate_params(_, _, _, _, _, _) -> + ignore. + +parse_max_window_bits(<<"8">>) -> 8; +parse_max_window_bits(<<"9">>) -> 9; +parse_max_window_bits(<<"10">>) -> 10; +parse_max_window_bits(<<"11">>) -> 11; +parse_max_window_bits(<<"12">>) -> 12; +parse_max_window_bits(<<"13">>) -> 13; +parse_max_window_bits(<<"14">>) -> 14; +parse_max_window_bits(<<"15">>) -> 15; +parse_max_window_bits(_) -> error. + +% A negative WindowBits value indicates that zlib headers are not used. +init_permessage_deflate(InflateWindowBits, DeflateWindowBits, Opts) -> + Inflate = zlib:open(), + ok = zlib:inflateInit(Inflate, -InflateWindowBits), + Deflate = zlib:open(), + %% @todo Remove this case .. of for OTP 18+ if PR https://github.com/erlang/otp/pull/633 gets merged. + DeflateWindowBits2 = case DeflateWindowBits of + 8 -> 9; + _ -> DeflateWindowBits + end, + ok = zlib:deflateInit(Deflate, + maps:get(level, Opts, best_compression), + deflated, + -DeflateWindowBits2, + maps:get(mem_level, Opts, 8), + maps:get(strategy, Opts, default)), + {Inflate, Deflate}. + +%% @doc Negotiate the x-webkit-deflate-frame extension. +%% +%% The implementation is very basic and none of the parameters +%% are currently supported. + +negotiate_x_webkit_deflate_frame(_, #{deflate := _}, _) -> + ignore; +negotiate_x_webkit_deflate_frame(_Params, Extensions, Opts) -> + % Since we are negotiating an unconstrained deflate-frame + % then we must be willing to accept frames using the + % maximum window size which is 2^15. + {Inflate, Deflate} = init_permessage_deflate(15, 15, Opts), + {ok, <<"x-webkit-deflate-frame">>, + Extensions#{ + deflate => Deflate, + deflate_takeover => takeover, + inflate => Inflate, + inflate_takeover => takeover}}. + +%% @doc Validate the negotiated permessage-deflate extension. + +%% Error when more than one deflate extension was negotiated. +validate_permessage_deflate(_, #{deflate := _}, _) -> + error; +validate_permessage_deflate(Params, Extensions, Opts) -> + case lists:usort(Params) of + %% Error if multiple parameters with the same name. + Params2 when length(Params) =/= length(Params2) -> + error; + Params2 -> + %% @todo Might want to make some of these configurable defaults if at all possible. + case parse_response_permessage_deflate_params(Params2, 15, takeover, 15, takeover) of + error -> + error; + {ClientWindowBits, ClientTakeOver, ServerWindowBits, ServerTakeOver} -> + {Inflate, Deflate} = init_permessage_deflate(ServerWindowBits, ClientWindowBits, Opts), + {ok, Extensions#{ + deflate => Deflate, + deflate_takeover => ClientTakeOver, + inflate => Inflate, + inflate_takeover => ServerTakeOver}} + end + end. + +parse_response_permessage_deflate_params([], CB, CTO, SB, STO) -> + {CB, CTO, SB, STO}; +parse_response_permessage_deflate_params([{<<"client_max_window_bits">>, Max}|Tail], _, CTO, SB, STO) -> + case parse_max_window_bits(Max) of + error -> error; + CB -> parse_response_permessage_deflate_params(Tail, CB, CTO, SB, STO) + end; +parse_response_permessage_deflate_params([<<"client_no_context_takeover">>|Tail], CB, _, SB, STO) -> + parse_response_permessage_deflate_params(Tail, CB, no_takeover, SB, STO); +parse_response_permessage_deflate_params([{<<"server_max_window_bits">>, Max}|Tail], CB, CTO, _, STO) -> + case parse_max_window_bits(Max) of + error -> error; + SB -> parse_response_permessage_deflate_params(Tail, CB, CTO, SB, STO) + end; +parse_response_permessage_deflate_params([<<"server_no_context_takeover">>|Tail], CB, CTO, SB, _) -> + parse_response_permessage_deflate_params(Tail, CB, CTO, SB, no_takeover); +%% Error if unknown parameter; error if parameter with invalid or missing value. +parse_response_permessage_deflate_params(_, _, _, _, _) -> + error. + +%% @doc Parse and validate the Websocket frame header. +%% +%% This function also updates the fragmentation state according to +%% information found in the frame's header. + +-spec parse_header(binary(), extensions(), frag_state()) + -> error | more | {frame_type(), frag_state(), rsv(), non_neg_integer(), mask_key(), binary()}. +%% RSV bits MUST be 0 unless an extension is negotiated +%% that defines meanings for non-zero values. +parse_header(<< _:1, Rsv:3, _/bits >>, Extensions, _) when Extensions =:= #{}, Rsv =/= 0 -> error; +%% Last 2 RSV bits MUST be 0 if deflate-frame extension is used. +parse_header(<< _:2, 1:1, _/bits >>, #{deflate := _}, _) -> error; +parse_header(<< _:3, 1:1, _/bits >>, #{deflate := _}, _) -> error; +%% Invalid opcode. Note that these opcodes may be used by extensions. +parse_header(<< _:4, 3:4, _/bits >>, _, _) -> error; +parse_header(<< _:4, 4:4, _/bits >>, _, _) -> error; +parse_header(<< _:4, 5:4, _/bits >>, _, _) -> error; +parse_header(<< _:4, 6:4, _/bits >>, _, _) -> error; +parse_header(<< _:4, 7:4, _/bits >>, _, _) -> error; +parse_header(<< _:4, 11:4, _/bits >>, _, _) -> error; +parse_header(<< _:4, 12:4, _/bits >>, _, _) -> error; +parse_header(<< _:4, 13:4, _/bits >>, _, _) -> error; +parse_header(<< _:4, 14:4, _/bits >>, _, _) -> error; +parse_header(<< _:4, 15:4, _/bits >>, _, _) -> error; +%% Control frames MUST NOT be fragmented. +parse_header(<< 0:1, _:3, Opcode:4, _/bits >>, _, _) when Opcode >= 8 -> error; +%% A frame MUST NOT use the zero opcode unless fragmentation was initiated. +parse_header(<< _:4, 0:4, _/bits >>, _, undefined) -> error; +%% Non-control opcode when expecting control message or next fragment. +parse_header(<< _:4, 1:4, _/bits >>, _, {_, _, _}) -> error; +parse_header(<< _:4, 2:4, _/bits >>, _, {_, _, _}) -> error; +parse_header(<< _:4, 3:4, _/bits >>, _, {_, _, _}) -> error; +parse_header(<< _:4, 4:4, _/bits >>, _, {_, _, _}) -> error; +parse_header(<< _:4, 5:4, _/bits >>, _, {_, _, _}) -> error; +parse_header(<< _:4, 6:4, _/bits >>, _, {_, _, _}) -> error; +parse_header(<< _:4, 7:4, _/bits >>, _, {_, _, _}) -> error; +%% Close control frame length MUST be 0 or >= 2. +parse_header(<< _:4, 8:4, _:1, 1:7, _/bits >>, _, _) -> error; +%% Close control frame with incomplete close code. Need more data. +parse_header(Data = << _:4, 8:4, 0:1, Len:7, _/bits >>, _, _) when Len > 1, byte_size(Data) < 4 -> more; +parse_header(Data = << _:4, 8:4, 1:1, Len:7, _/bits >>, _, _) when Len > 1, byte_size(Data) < 8 -> more; +%% 7 bits payload length. +parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 0:1, Len:7, Rest/bits >>, _, FragState) when Len < 126 -> + parse_header(Opcode, Fin, FragState, Rsv, Len, undefined, Rest); +parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 1:1, Len:7, MaskKey:32, Rest/bits >>, _, FragState) when Len < 126 -> + parse_header(Opcode, Fin, FragState, Rsv, Len, MaskKey, Rest); +%% 16 bits payload length. +parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 0:1, 126:7, Len:16, Rest/bits >>, _, FragState) when Len > 125, Opcode < 8 -> + parse_header(Opcode, Fin, FragState, Rsv, Len, undefined, Rest); +parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 1:1, 126:7, Len:16, MaskKey:32, Rest/bits >>, _, FragState) when Len > 125, Opcode < 8 -> + parse_header(Opcode, Fin, FragState, Rsv, Len, MaskKey, Rest); +%% 63 bits payload length. +parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 0:1, 127:7, 0:1, Len:63, Rest/bits >>, _, FragState) when Len > 16#ffff, Opcode < 8 -> + parse_header(Opcode, Fin, FragState, Rsv, Len, undefined, Rest); +parse_header(<< Fin:1, Rsv:3/bits, Opcode:4, 1:1, 127:7, 0:1, Len:63, MaskKey:32, Rest/bits >>, _, FragState) when Len > 16#ffff, Opcode < 8 -> + parse_header(Opcode, Fin, FragState, Rsv, Len, MaskKey, Rest); +%% When payload length is over 63 bits, the most significant bit MUST be 0. +parse_header(<< _:9, 127:7, 1:1, _/bits >>, _, _) -> error; +%% For the next two clauses, it can be one of the following: +%% +%% * The minimal number of bytes MUST be used to encode the length +%% * All control frames MUST have a payload length of 125 bytes or less +parse_header(<< _:8, 0:1, 126:7, _:16, _/bits >>, _, _) -> error; +parse_header(<< _:8, 1:1, 126:7, _:48, _/bits >>, _, _) -> error; +parse_header(<< _:8, 0:1, 127:7, _:64, _/bits >>, _, _) -> error; +parse_header(<< _:8, 1:1, 127:7, _:96, _/bits >>, _, _) -> error; +%% Need more data. +parse_header(_, _, _) -> more. + +parse_header(Opcode, Fin, FragState, Rsv, Len, MaskKey, Rest) -> + Type = opcode_to_frame_type(Opcode), + Type2 = case Fin of + 0 -> fragment; + 1 -> Type + end, + {Type2, frag_state(Type, Fin, Rsv, FragState), Rsv, Len, MaskKey, Rest}. + +opcode_to_frame_type(0) -> fragment; +opcode_to_frame_type(1) -> text; +opcode_to_frame_type(2) -> binary; +opcode_to_frame_type(8) -> close; +opcode_to_frame_type(9) -> ping; +opcode_to_frame_type(10) -> pong. + +frag_state(Type, 0, Rsv, undefined) -> {nofin, Type, Rsv}; +frag_state(fragment, 0, _, FragState = {nofin, _, _}) -> FragState; +frag_state(fragment, 1, _, {nofin, Type, Rsv}) -> {fin, Type, Rsv}; +frag_state(_, 1, _, FragState) -> FragState. + +%% @doc Parse and validate the frame's payload. +%% +%% Validation is only required for text and close frames which feature +%% a UTF-8 payload. + +-spec parse_payload(binary(), mask_key(), utf8_state(), non_neg_integer(), + frame_type(), non_neg_integer(), frag_state(), extensions(), rsv()) + -> {ok, binary(), utf8_state(), binary()} | {more, binary(), utf8_state()} | error. +%% Empty last frame of compressed message. +parse_payload(Data, _, Utf8State, _, _, 0, {fin, _, << 1:1, 0:2 >>}, + #{inflate := Inflate, inflate_takeover := TakeOver}, _) -> + zlib:inflate(Inflate, << 0, 0, 255, 255 >>), + case TakeOver of + no_takeover -> zlib:inflateReset(Inflate); + takeover -> ok + end, + {ok, <<>>, Utf8State, Data}; +%% Compressed fragmented frame. +parse_payload(Data, MaskKey, Utf8State, ParsedLen, Type, Len, FragState = {_, _, << 1:1, 0:2 >>}, + #{inflate := Inflate, inflate_takeover := TakeOver}, _) -> + {Data2, Rest, Eof} = split_payload(Data, Len), + Payload = inflate_frame(unmask(Data2, MaskKey, ParsedLen), Inflate, TakeOver, FragState, Eof), + validate_payload(Payload, Rest, Utf8State, ParsedLen, Type, FragState, Eof); +%% Compressed frame. +parse_payload(Data, MaskKey, Utf8State, ParsedLen, Type, Len, FragState, + #{inflate := Inflate, inflate_takeover := TakeOver}, << 1:1, 0:2 >>) when Type =:= text; Type =:= binary -> + {Data2, Rest, Eof} = split_payload(Data, Len), + Payload = inflate_frame(unmask(Data2, MaskKey, ParsedLen), Inflate, TakeOver, FragState, Eof), + validate_payload(Payload, Rest, Utf8State, ParsedLen, Type, FragState, Eof); +%% Empty frame. +parse_payload(Data, _, Utf8State = 0, 0, _, 0, _, _, _) -> + {ok, <<>>, Utf8State, Data}; +%% Start of close frame. +parse_payload(Data, MaskKey, Utf8State, 0, Type = close, Len, FragState, _, << 0:3 >>) -> + {<< MaskedCode:2/binary, Data2/bits >>, Rest, Eof} = split_payload(Data, Len), + << CloseCode:16 >> = unmask(MaskedCode, MaskKey, 0), + case validate_close_code(CloseCode) of + ok -> + Payload = unmask(Data2, MaskKey, 2), + case validate_payload(Payload, Rest, Utf8State, 2, Type, FragState, Eof) of + {ok, _, Utf8State2, _} -> {ok, CloseCode, Payload, Utf8State2, Rest}; + {more, _, Utf8State2} -> {more, CloseCode, Payload, Utf8State2}; + Error -> Error + end; + error -> + {error, badframe} + end; +%% Normal frame. +parse_payload(Data, MaskKey, Utf8State, ParsedLen, Type, Len, FragState, _, << 0:3 >>) -> + {Data2, Rest, Eof} = split_payload(Data, Len), + Payload = unmask(Data2, MaskKey, ParsedLen), + validate_payload(Payload, Rest, Utf8State, ParsedLen, Type, FragState, Eof). + +split_payload(Data, Len) -> + case byte_size(Data) of + Len -> + {Data, <<>>, true}; + DataLen when DataLen < Len -> + {Data, <<>>, false}; + _ -> + << Data2:Len/binary, Rest/bits >> = Data, + {Data2, Rest, true} + end. + +validate_close_code(Code) -> + if + Code < 1000 -> error; + Code =:= 1004 -> error; + Code =:= 1005 -> error; + Code =:= 1006 -> error; + Code > 1011, Code < 3000 -> error; + Code > 4999 -> error; + true -> ok + end. + +unmask(Data, undefined, _) -> + Data; +unmask(Data, MaskKey, 0) -> + mask(Data, MaskKey, <<>>); +%% We unmask on the fly so we need to continue from the right mask byte. +unmask(Data, MaskKey, UnmaskedLen) -> + Left = UnmaskedLen rem 4, + Right = 4 - Left, + MaskKey2 = (MaskKey bsl (Left * 8)) + (MaskKey bsr (Right * 8)), + mask(Data, MaskKey2, <<>>). + +mask(<<>>, _, Unmasked) -> + Unmasked; +mask(<< O:32, Rest/bits >>, MaskKey, Acc) -> + T = O bxor MaskKey, + mask(Rest, MaskKey, << Acc/binary, T:32 >>); +mask(<< O:24 >>, MaskKey, Acc) -> + << MaskKey2:24, _:8 >> = << MaskKey:32 >>, + T = O bxor MaskKey2, + << Acc/binary, T:24 >>; +mask(<< O:16 >>, MaskKey, Acc) -> + << MaskKey2:16, _:16 >> = << MaskKey:32 >>, + T = O bxor MaskKey2, + << Acc/binary, T:16 >>; +mask(<< O:8 >>, MaskKey, Acc) -> + << MaskKey2:8, _:24 >> = << MaskKey:32 >>, + T = O bxor MaskKey2, + << Acc/binary, T:8 >>. + +inflate_frame(Data, Inflate, TakeOver, FragState, true) + when FragState =:= undefined; element(1, FragState) =:= fin -> + Data2 = zlib:inflate(Inflate, << Data/binary, 0, 0, 255, 255 >>), + case TakeOver of + no_takeover -> zlib:inflateReset(Inflate); + takeover -> ok + end, + iolist_to_binary(Data2); +inflate_frame(Data, Inflate, _T, _F, _E) -> + iolist_to_binary(zlib:inflate(Inflate, Data)). + +%% Text frames and close control frames MUST have a payload that is valid UTF-8. +validate_payload(Payload, Rest, Utf8State, _, Type, _, Eof) when Type =:= text; Type =:= close -> + case validate_utf8(Payload, Utf8State) of + 1 -> {error, badencoding}; + Utf8State2 when not Eof -> {more, Payload, Utf8State2}; + 0 when Eof -> {ok, Payload, 0, Rest}; + _ -> {error, badencoding} + end; +validate_payload(Payload, Rest, Utf8State, _, fragment, {Fin, text, _}, Eof) -> + case validate_utf8(Payload, Utf8State) of + 1 -> {error, badencoding}; + 0 when Eof -> {ok, Payload, 0, Rest}; + Utf8State2 when Eof, Fin =:= nofin -> {ok, Payload, Utf8State2, Rest}; + Utf8State2 when not Eof -> {more, Payload, Utf8State2}; + _ -> {error, badencoding} + end; +validate_payload(Payload, _, Utf8State, _, _, _, false) -> + {more, Payload, Utf8State}; +validate_payload(Payload, Rest, Utf8State, _, _, _, true) -> + {ok, Payload, Utf8State, Rest}. + +%% Based on the Flexible and Economical UTF-8 Decoder algorithm by +%% Bjoern Hoehrmann <[email protected]> (http://bjoern.hoehrmann.de/utf-8/decoder/dfa/). +%% +%% The original algorithm has been unrolled into all combinations of values for C and State +%% each with a clause. The common clauses were then grouped together. +%% +%% This function returns 0 on success, 1 on error, and 2..8 on incomplete data. +validate_utf8(<<>>, State) -> State; +validate_utf8(<< C, Rest/bits >>, 0) when C < 128 -> validate_utf8(Rest, 0); +validate_utf8(<< C, Rest/bits >>, 2) when C >= 128, C < 144 -> validate_utf8(Rest, 0); +validate_utf8(<< C, Rest/bits >>, 3) when C >= 128, C < 144 -> validate_utf8(Rest, 2); +validate_utf8(<< C, Rest/bits >>, 5) when C >= 128, C < 144 -> validate_utf8(Rest, 2); +validate_utf8(<< C, Rest/bits >>, 7) when C >= 128, C < 144 -> validate_utf8(Rest, 3); +validate_utf8(<< C, Rest/bits >>, 8) when C >= 128, C < 144 -> validate_utf8(Rest, 3); +validate_utf8(<< C, Rest/bits >>, 2) when C >= 144, C < 160 -> validate_utf8(Rest, 0); +validate_utf8(<< C, Rest/bits >>, 3) when C >= 144, C < 160 -> validate_utf8(Rest, 2); +validate_utf8(<< C, Rest/bits >>, 5) when C >= 144, C < 160 -> validate_utf8(Rest, 2); +validate_utf8(<< C, Rest/bits >>, 6) when C >= 144, C < 160 -> validate_utf8(Rest, 3); +validate_utf8(<< C, Rest/bits >>, 7) when C >= 144, C < 160 -> validate_utf8(Rest, 3); +validate_utf8(<< C, Rest/bits >>, 2) when C >= 160, C < 192 -> validate_utf8(Rest, 0); +validate_utf8(<< C, Rest/bits >>, 3) when C >= 160, C < 192 -> validate_utf8(Rest, 2); +validate_utf8(<< C, Rest/bits >>, 4) when C >= 160, C < 192 -> validate_utf8(Rest, 2); +validate_utf8(<< C, Rest/bits >>, 6) when C >= 160, C < 192 -> validate_utf8(Rest, 3); +validate_utf8(<< C, Rest/bits >>, 7) when C >= 160, C < 192 -> validate_utf8(Rest, 3); +validate_utf8(<< C, Rest/bits >>, 0) when C >= 194, C < 224 -> validate_utf8(Rest, 2); +validate_utf8(<< 224, Rest/bits >>, 0) -> validate_utf8(Rest, 4); +validate_utf8(<< C, Rest/bits >>, 0) when C >= 225, C < 237 -> validate_utf8(Rest, 3); +validate_utf8(<< 237, Rest/bits >>, 0) -> validate_utf8(Rest, 5); +validate_utf8(<< C, Rest/bits >>, 0) when C =:= 238; C =:= 239 -> validate_utf8(Rest, 3); +validate_utf8(<< 240, Rest/bits >>, 0) -> validate_utf8(Rest, 6); +validate_utf8(<< C, Rest/bits >>, 0) when C =:= 241; C =:= 242; C =:= 243 -> validate_utf8(Rest, 7); +validate_utf8(<< 244, Rest/bits >>, 0) -> validate_utf8(Rest, 8); +validate_utf8(_, _) -> 1. + +%% @doc Return a frame tuple from parsed state and data. + +-spec make_frame(frame_type(), binary(), close_code(), frag_state()) -> frame(). +%% Fragmented frame. +make_frame(fragment, Payload, _, {Fin, Type, _}) -> {fragment, Fin, Type, Payload}; +make_frame(text, Payload, _, _) -> {text, Payload}; +make_frame(binary, Payload, _, _) -> {binary, Payload}; +make_frame(close, <<>>, undefined, _) -> close; +make_frame(close, Payload, CloseCode, _) -> {close, CloseCode, Payload}; +make_frame(ping, <<>>, _, _) -> ping; +make_frame(ping, Payload, _, _) -> {ping, Payload}; +make_frame(pong, <<>>, _, _) -> pong; +make_frame(pong, Payload, _, _) -> {pong, Payload}. + +%% @doc Construct an unmasked Websocket frame. + +-spec frame(frame(), extensions()) -> iodata(). +%% Control frames. Control packets must not be > 125 in length. +frame(close, _) -> + << 1:1, 0:3, 8:4, 0:8 >>; +frame(ping, _) -> + << 1:1, 0:3, 9:4, 0:8 >>; +frame(pong, _) -> + << 1:1, 0:3, 10:4, 0:8 >>; +frame({close, Payload}, Extensions) -> + frame({close, 1000, Payload}, Extensions); +frame({close, StatusCode, Payload}, _) -> + Len = 2 + iolist_size(Payload), + true = Len =< 125, + [<< 1:1, 0:3, 8:4, 0:1, Len:7, StatusCode:16 >>, Payload]; +frame({ping, Payload}, _) -> + Len = iolist_size(Payload), + true = Len =< 125, + [<< 1:1, 0:3, 9:4, 0:1, Len:7 >>, Payload]; +frame({pong, Payload}, _) -> + Len = iolist_size(Payload), + true = Len =< 125, + [<< 1:1, 0:3, 10:4, 0:1, Len:7 >>, Payload]; +%% Data frames, deflate-frame extension. +frame({text, Payload}, #{deflate := Deflate, deflate_takeover := TakeOver}) -> + Payload2 = deflate_frame(Payload, Deflate, TakeOver), + Len = payload_length(Payload2), + [<< 1:1, 1:1, 0:2, 1:4, 0:1, Len/bits >>, Payload2]; +frame({binary, Payload}, #{deflate := Deflate, deflate_takeover := TakeOver}) -> + Payload2 = deflate_frame(Payload, Deflate, TakeOver), + Len = payload_length(Payload2), + [<< 1:1, 1:1, 0:2, 2:4, 0:1, Len/bits >>, Payload2]; +%% Data frames. +frame({text, Payload}, _) -> + Len = payload_length(Payload), + [<< 1:1, 0:3, 1:4, 0:1, Len/bits >>, Payload]; +frame({binary, Payload}, _) -> + Len = payload_length(Payload), + [<< 1:1, 0:3, 2:4, 0:1, Len/bits >>, Payload]. + +%% @doc Construct a masked Websocket frame. +%% +%% We use a mask key of 0 if there is no payload for close, ping and pong frames. + +-spec masked_frame(frame(), extensions()) -> iodata(). +%% Control frames. Control packets must not be > 125 in length. +masked_frame(close, _) -> + << 1:1, 0:3, 8:4, 1:1, 0:39 >>; +masked_frame(ping, _) -> + << 1:1, 0:3, 9:4, 1:1, 0:39 >>; +masked_frame(pong, _) -> + << 1:1, 0:3, 10:4, 1:1, 0:39 >>; +masked_frame({close, Payload}, Extensions) -> + frame({close, 1000, Payload}, Extensions); +masked_frame({close, StatusCode, Payload}, _) -> + Len = 2 + iolist_size(Payload), + true = Len =< 125, + MaskKeyBin = << MaskKey:32 >> = crypto:rand_bytes(4), + [<< 1:1, 0:3, 8:4, 1:1, Len:7 >>, MaskKeyBin, mask(iolist_to_binary([<< StatusCode:16 >>, Payload]), MaskKey, <<>>)]; +masked_frame({ping, Payload}, _) -> + Len = iolist_size(Payload), + true = Len =< 125, + MaskKeyBin = << MaskKey:32 >> = crypto:rand_bytes(4), + [<< 1:1, 0:3, 9:4, 1:1, Len:7 >>, MaskKeyBin, mask(iolist_to_binary(Payload), MaskKey, <<>>)]; +masked_frame({pong, Payload}, _) -> + Len = iolist_size(Payload), + true = Len =< 125, + MaskKeyBin = << MaskKey:32 >> = crypto:rand_bytes(4), + [<< 1:1, 0:3, 10:4, 1:1, Len:7 >>, MaskKeyBin, mask(iolist_to_binary(Payload), MaskKey, <<>>)]; +%% Data frames, deflate-frame extension. +masked_frame({text, Payload}, #{deflate := Deflate, deflate_takeover := TakeOver}) -> + MaskKeyBin = << MaskKey:32 >> = crypto:rand_bytes(4), + Payload2 = mask(deflate_frame(Payload, Deflate, TakeOver), MaskKey, <<>>), + Len = payload_length(Payload2), + [<< 1:1, 1:1, 0:2, 1:4, 1:1, Len/bits >>, MaskKeyBin, Payload2]; +masked_frame({binary, Payload}, #{deflate := Deflate, deflate_takeover := TakeOver}) -> + MaskKeyBin = << MaskKey:32 >> = crypto:rand_bytes(4), + Payload2 = mask(deflate_frame(Payload, Deflate, TakeOver), MaskKey, <<>>), + Len = payload_length(Payload2), + [<< 1:1, 1:1, 0:2, 2:4, 1:1, Len/bits >>, MaskKeyBin, Payload2]; +%% Data frames. +masked_frame({text, Payload}, _) -> + MaskKeyBin = << MaskKey:32 >> = crypto:rand_bytes(4), + Len = payload_length(Payload), + [<< 1:1, 0:3, 1:4, 1:1, Len/bits >>, MaskKeyBin, mask(iolist_to_binary(Payload), MaskKey, <<>>)]; +masked_frame({binary, Payload}, _) -> + MaskKeyBin = << MaskKey:32 >> = crypto:rand_bytes(4), + Len = payload_length(Payload), + [<< 1:1, 0:3, 2:4, 1:1, Len/bits >>, MaskKeyBin, mask(iolist_to_binary(Payload), MaskKey, <<>>)]. + +payload_length(Payload) -> + case byte_size(Payload) of + N when N =< 125 -> << N:7 >>; + N when N =< 16#ffff -> << 126:7, N:16 >>; + N when N =< 16#7fffffffffffffff -> << 127:7, N:64 >> + end. + +deflate_frame(Payload, Deflate, TakeOver) -> + Deflated = iolist_to_binary(zlib:deflate(Deflate, Payload, sync)), + case TakeOver of + no_takeover -> zlib:deflateReset(Deflate); + takeover -> ok + end, + Len = byte_size(Deflated) - 4, + case Deflated of + << Body:Len/binary, 0:8, 0:8, 255:8, 255:8 >> -> Body; + _ -> Deflated + end. diff --git a/src/cowlib.app.src b/src/cowlib.app.src index 568f72b..5d358c2 100644 --- a/src/cowlib.app.src +++ b/src/cowlib.app.src @@ -1,4 +1,4 @@ -%% Copyright (c) 2013, Loïc Hoguin <[email protected]> +%% Copyright (c) 2013-2015, Loïc Hoguin <[email protected]> %% %% Permission to use, copy, modify, and/or distribute this software for any %% purpose with or without fee is hereby granted, provided that the above @@ -14,11 +14,13 @@ {application, cowlib, [ {description, "Support library for manipulating Web protocols."}, - {vsn, "0.4.0"}, + {vsn, "1.2.0"}, + {id, "git"}, {modules, []}, {registered, []}, {applications, [ kernel, - stdlib + stdlib, + crypto ]} ]}. diff --git a/test/eunit_SUITE.erl b/test/eunit_SUITE.erl deleted file mode 100644 index e9d9282..0000000 --- a/test/eunit_SUITE.erl +++ /dev/null @@ -1,31 +0,0 @@ -%% Copyright (c) 2013, Loïc Hoguin <[email protected]> -%% -%% Permission to use, copy, modify, and/or distribute this software for any -%% purpose with or without fee is hereby granted, provided that the above -%% copyright notice and this permission notice appear in all copies. -%% -%% THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -%% WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -%% MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -%% ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -%% WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -%% ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -%% OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - --module(eunit_SUITE). - --include_lib("common_test/include/ct.hrl"). - -%% ct. --export([all/0]). - -%% Tests. --export([eunit/1]). - -%% ct. - -all() -> - [eunit]. - -eunit(_) -> - ok = eunit:test({application, cowlib}). |