diff options
Diffstat (limited to 'lib/stdlib/src/filename.erl')
-rw-r--r-- | lib/stdlib/src/filename.erl | 449 |
1 files changed, 379 insertions, 70 deletions
diff --git a/lib/stdlib/src/filename.erl b/lib/stdlib/src/filename.erl index 008beb8b67..b7b7b562ab 100644 --- a/lib/stdlib/src/filename.erl +++ b/lib/stdlib/src/filename.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2014. All Rights Reserved. +%% Copyright Ericsson AB 1997-2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -19,6 +19,9 @@ %% -module(filename). +-deprecated({find_src,1,next_major_release}). +-deprecated({find_src,2,next_major_release}). + %% Purpose: Provides generic manipulation of filenames. %% %% Generally, these functions accept filenames in the native format @@ -31,11 +34,46 @@ %% we flatten the arguments immediately on function entry as that makes %% it easier to ensure that the code works. +%% +%% *** Requirements on Raw Filename Format *** +%% +%% These requirements are due to the 'filename' module +%% in stdlib. This since it is documented that it +%% should be able to operate on raw filenames as well +%% as ordinary filenames. +%% +%% A raw filename *must* be a byte sequence where: +%% 1. Codepoints 0-127 (7-bit ascii) *must* be encoded +%% as a byte with the corresponding value. That is, +%% the most significant bit in the byte encoding the +%% codepoint is never set. +%% 2. Codepoints greater than 127 *must* be encoded +%% with the most significant bit set in *every* byte +%% encoding it. +%% +%% Latin1 and UTF-8 meet these requirements while +%% UTF-16 and UTF-32 don't. +%% +%% On Windows filenames are natively stored as malformed +%% UTF-16LE (lonely surrogates may appear). A more correct +%% description than UTF-16 would be an array of 16-bit +%% words... In order to meet the requirements of the +%% raw file format we convert the malformed UTF-16LE to +%% malformed UTF-8 which meet the requirements. +%% +%% Note that these requirements are today only OTP +%% internal (erts-stdlib internal) requirements that +%% could be changed. +%% + -export([absname/1, absname/2, absname_join/2, basename/1, basename/2, dirname/1, extension/1, join/1, join/2, pathtype/1, - rootname/1, rootname/2, split/1, nativename/1]). --export([find_src/1, find_src/2, flatten/1]). + rootname/1, rootname/2, split/1, flatten/1, nativename/1, + safe_relative_path/1]). +-export([find_src/1, find_src/2]). % deprecated +-export([basedir/2, basedir/3]). +-export([validate/1]). %% Undocumented and unsupported exports. -export([append/2]). @@ -139,6 +177,7 @@ absname_join(AbsBase, Name) -> -spec basename(Filename) -> file:filename_all() when Filename :: file:name_all(). + basename(Name) when is_binary(Name) -> case os:type() of {win32,_} -> @@ -433,6 +472,10 @@ join(Name1, Name2) when is_atom(Name2) -> join1([UcLetter, $:|Rest], RelativeName, [], win32) when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); +join1([$\\,$\\|Rest], RelativeName, [], win32) -> + join1([$/,$/|Rest], RelativeName, [], win32); +join1([$/,$/|Rest], RelativeName, [], win32) -> + join1(Rest, RelativeName, [$/,$/], win32); join1([$\\|Rest], RelativeName, Result, win32) -> join1([$/|Rest], RelativeName, Result, win32); join1([$/|Rest], RelativeName, [$., $/|Result], OsType) -> @@ -461,6 +504,10 @@ join1([Atom|Rest], RelativeName, Result, OsType) when is_atom(Atom) -> join1b(<<UcLetter, $:, Rest/binary>>, RelativeName, [], win32) when is_integer(UcLetter), UcLetter >= $A, UcLetter =< $Z -> join1b(Rest, RelativeName, [$:, UcLetter+$a-$A], win32); +join1b(<<$\\,$\\,Rest/binary>>, RelativeName, [], win32) -> + join1b(<<$/,$/,Rest/binary>>, RelativeName, [], win32); +join1b(<<$/,$/,Rest/binary>>, RelativeName, [], win32) -> + join1b(Rest, RelativeName, [$/,$/], win32); join1b(<<$\\,Rest/binary>>, RelativeName, Result, win32) -> join1b(<<$/,Rest/binary>>, RelativeName, Result, win32); join1b(<<$/,Rest/binary>>, RelativeName, [$., $/|Result], OsType) -> @@ -471,6 +518,8 @@ join1b(<<>>, <<>>, Result, OsType) -> list_to_binary(maybe_remove_dirsep(Result, OsType)); join1b(<<>>, RelativeName, [$:|Rest], win32) -> join1b(RelativeName, <<>>, [$:|Rest], win32); +join1b(<<>>, RelativeName, [$/,$/|Result], win32) -> + join1b(RelativeName, <<>>, [$/,$/|Result], win32); join1b(<<>>, RelativeName, [$/|Result], OsType) -> join1b(RelativeName, <<>>, [$/|Result], OsType); join1b(<<>>, RelativeName, [$., $/|Result], OsType) -> @@ -484,6 +533,8 @@ maybe_remove_dirsep([$/, $:, Letter], win32) -> [Letter, $:, $/]; maybe_remove_dirsep([$/], _) -> [$/]; +maybe_remove_dirsep([$/,$/], win32) -> + [$/,$/]; maybe_remove_dirsep([$/|Name], _) -> lists:reverse(Name); maybe_remove_dirsep(Name, _) -> @@ -673,6 +724,9 @@ win32_splitb(<<Letter0,$:,Rest/binary>>) when ?IS_DRIVELETTER(Letter0) -> Letter = fix_driveletter(Letter0), L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), [<<Letter,$:>> | [ X || X <- L, X =/= <<>> ]]; +win32_splitb(<<Slash,Slash,Rest/binary>>) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> + L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), + [<<"//">> | [ X || X <- L, X =/= <<>> ]]; win32_splitb(<<Slash,Rest/binary>>) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> L = binary:split(Rest,[<<"/">>,<<"\\">>],[global]), [<<$/>> | [ X || X <- L, X =/= <<>> ]]; @@ -684,6 +738,8 @@ win32_splitb(Name) -> unix_split(Name) -> split(Name, [], unix). +win32_split([Slash,Slash|Rest]) when ((Slash =:= $\\) orelse (Slash =:= $/)) -> + split(Rest, [[$/,$/]], win32); win32_split([$\\|Rest]) -> win32_split([$/|Rest]); win32_split([X, $\\|Rest]) when is_integer(X) -> @@ -748,7 +804,45 @@ separators() -> _ -> {false, false} end. +-spec safe_relative_path(Filename) -> 'unsafe' | SafeFilename when + Filename :: file:name_all(), + SafeFilename :: file:name_all(). + +safe_relative_path(Path) -> + case pathtype(Path) of + relative -> + Cs0 = split(Path), + safe_relative_path_1(Cs0, []); + _ -> + unsafe + end. +safe_relative_path_1(["."|T], Acc) -> + safe_relative_path_1(T, Acc); +safe_relative_path_1([<<".">>|T], Acc) -> + safe_relative_path_1(T, Acc); +safe_relative_path_1([".."|T], Acc) -> + climb(T, Acc); +safe_relative_path_1([<<"..">>|T], Acc) -> + climb(T, Acc); +safe_relative_path_1([H|T], Acc) -> + safe_relative_path_1(T, [H|Acc]); +safe_relative_path_1([], []) -> + []; +safe_relative_path_1([], Acc) -> + join(lists:reverse(Acc)). + +climb(_, []) -> + unsafe; +climb(T, [_|Acc]) -> + safe_relative_path_1(T, Acc). + +%% NOTE: The find_src/1/2 functions are deprecated; they try to do too much +%% at once and are not a good fit for this module. Parts of the code have +%% been moved to filelib:find_file/2 instead. Only this part of this +%% module is allowed to call the filelib module; such mutual dependency +%% should otherwise be avoided! This code should eventually be removed. +%% %% find_src(Module) -- %% find_src(Module, Rules) -- @@ -791,14 +885,7 @@ separators() -> | {'d', atom()}, ErrorReason :: 'non_existing' | 'preloaded' | 'interpreted'. find_src(Mod) -> - Default = [{"", ""}, {"ebin", "src"}, {"ebin", "esrc"}], - Rules = - case application:get_env(kernel, source_search_rules) of - undefined -> Default; - {ok, []} -> Default; - {ok, R} when is_list(R) -> R - end, - find_src(Mod, Rules). + find_src(Mod, []). -spec find_src(Beam, Rules) -> {SourceFile, Options} | {error, {ErrorReason, Module}} when @@ -814,44 +901,47 @@ find_src(Mod) -> ErrorReason :: 'non_existing' | 'preloaded' | 'interpreted'. find_src(Mod, Rules) when is_atom(Mod) -> find_src(atom_to_list(Mod), Rules); -find_src(File0, Rules) when is_list(File0) -> - Mod = list_to_atom(basename(File0, ".erl")), - File = rootname(File0, ".erl"), - case readable_file(File++".erl") of - true -> - try_file(File, Mod, Rules); - false -> - try_file(undefined, Mod, Rules) - end. - -try_file(File, Mod, Rules) -> +find_src(ModOrFile, Rules) when is_list(ModOrFile) -> + Extension = ".erl", + Mod = list_to_atom(basename(ModOrFile, Extension)), case code:which(Mod) of Possibly_Rel_Path when is_list(Possibly_Rel_Path) -> - {ok, Cwd} = file:get_cwd(), - Path = join(Cwd, Possibly_Rel_Path), - try_file(File, Path, Mod, Rules); + {ok, Cwd} = file:get_cwd(), + ObjPath = make_abs_path(Cwd, Possibly_Rel_Path), + find_src_1(ModOrFile, ObjPath, Mod, Extension, Rules); Ecode when is_atom(Ecode) -> % Ecode :: ecode() {error, {Ecode, Mod}} end. %% At this point, the Mod is known to be valid. %% If the source name is not known, find it. -%% Then get the compilation options. -%% Returns: {SrcFile, Options} +find_src_1(ModOrFile, ObjPath, Mod, Extension, Rules) -> + %% The documentation says this function must return the found path + %% without extension in all cases. Also, ModOrFile could be given with + %% or without extension. Hence the calls to rootname below. + ModOrFileRoot = rootname(ModOrFile, Extension), + case filelib:is_regular(ModOrFileRoot++Extension) of + true -> + find_src_2(ModOrFileRoot, Mod); + false -> + SrcName = basename(ObjPath, code:objfile_extension()) ++ Extension, + case filelib:find_file(SrcName, dirname(ObjPath), Rules) of + {ok, SrcFile} -> + find_src_2(rootname(SrcFile, Extension), Mod); + Error -> + Error + end + end. -try_file(undefined, ObjFilename, Mod, Rules) -> - case get_source_file(ObjFilename, Mod, Rules) of - {ok, File} -> try_file(File, ObjFilename, Mod, Rules); - Error -> Error - end; -try_file(Src, _ObjFilename, Mod, _Rules) -> +%% Get the compilation options and return {SrcFileRoot, Options} +find_src_2(SrcRoot, Mod) -> List = case Mod:module_info(compile) of none -> []; List0 -> List0 end, Options = proplists:get_value(options, List, []), {ok, Cwd} = file:get_cwd(), - AbsPath = make_abs_path(Cwd, Src), + AbsPath = make_abs_path(Cwd, SrcRoot), {AbsPath, filter_options(dirname(AbsPath), Options, [])}. %% Filters the options. @@ -882,42 +972,6 @@ filter_options(Base, [_|Rest], Result) -> filter_options(_Base, [], Result) -> Result. -%% Gets the source file given path of object code and module name. - -get_source_file(Obj, Mod, Rules) -> - source_by_rules(dirname(Obj), atom_to_list(Mod), Rules). - -source_by_rules(Dir, Base, [{From, To}|Rest]) -> - case try_rule(Dir, Base, From, To) of - {ok, File} -> {ok, File}; - error -> source_by_rules(Dir, Base, Rest) - end; -source_by_rules(_Dir, _Base, []) -> - {error, source_file_not_found}. - -try_rule(Dir, Base, From, To) -> - case lists:suffix(From, Dir) of - true -> - NewDir = lists:sublist(Dir, 1, length(Dir)-length(From))++To, - Src = join(NewDir, Base), - case readable_file(Src++".erl") of - true -> {ok, Src}; - false -> error - end; - false -> - error - end. - -readable_file(File) -> - case file:read_file_info(File) of - {ok, #file_info{type=regular, access=read}} -> - true; - {ok, #file_info{type=regular, access=read_write}} -> - true; - _Other -> - false - end. - make_abs_path(BasePath, Path) -> join(BasePath, Path). @@ -954,3 +1008,258 @@ filename_string_to_binary(List) -> Bin end. +%% Application Base Directories +%% basedir +%% http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html + +-type basedir_path_type() :: 'user_cache' | 'user_config' | 'user_data' + | 'user_log'. +-type basedir_paths_type() :: 'site_config' | 'site_data'. + +-type basedir_opts() :: #{author => string() | binary(), + os => 'windows' | 'darwin' | 'linux', + version => string() | binary()}. + +-spec basedir(PathType,Application) -> file:filename_all() when + PathType :: basedir_path_type(), + Application :: string() | binary(); + (PathsType,Application) -> [file:filename_all()] when + PathsType :: basedir_paths_type(), + Application :: string() | binary(). + +basedir(Type,Application) when is_atom(Type), is_list(Application) orelse + is_binary(Application) -> + basedir(Type, Application, #{}). + +-spec basedir(PathType,Application,Opts) -> file:filename_all() when + PathType :: basedir_path_type(), + Application :: string() | binary(), + Opts :: basedir_opts(); + (PathsType,Application,Opts) -> [file:filename_all()] when + PathsType :: basedir_paths_type(), + Application :: string() | binary(), + Opts :: basedir_opts(). + +basedir(Type,Application,Opts) when is_atom(Type), is_map(Opts), + is_list(Application) orelse + is_binary(Application) -> + Os = basedir_os_from_opts(Opts), + Name = basedir_name_from_opts(Os,Application,Opts), + Base = basedir_from_os(Type,Os), + case {Type,Os} of + {user_log,linux} -> + filename:join([Base,Name,"log"]); + {user_log,windows} -> + filename:join([Base,Name,"Logs"]); + {user_cache,windows} -> + filename:join([Base,Name,"Cache"]); + {Type,_} when Type =:= site_config orelse Type =:= site_data -> + [filename:join([B,Name]) || B <- Base]; + _ -> + filename:join([Base,Name]) + end. + +basedir_os_from_opts(#{os := linux}) -> linux; +basedir_os_from_opts(#{os := windows}) -> windows; +basedir_os_from_opts(#{os := darwin}) -> darwin; +basedir_os_from_opts(#{}) -> basedir_os_type(). + +basedir_name_from_opts(windows,App,#{author:=Author,version:=Vsn}) -> + filename:join([Author,App,Vsn]); +basedir_name_from_opts(windows,App,#{author:=Author}) -> + filename:join([Author,App]); +basedir_name_from_opts(_,App,#{version:=Vsn}) -> + filename:join([App,Vsn]); +basedir_name_from_opts(_,App,_) -> + App. + +basedir_from_os(Type,Os) -> + case Os of + linux -> basedir_linux(Type); + darwin -> basedir_darwin(Type); + windows -> basedir_windows(Type) + end. + +-define(basedir_linux_user_data, ".local/share"). +-define(basedir_linux_user_config, ".config"). +-define(basedir_linux_user_cache, ".cache"). +-define(basedir_linux_user_log, ".cache"). %% .cache/App/log +-define(basedir_linux_site_data, "/usr/local/share/:/usr/share/"). +-define(basedir_linux_site_config, "/etc/xdg"). + +basedir_linux(Type) -> + case Type of + user_data -> getenv("XDG_DATA_HOME", ?basedir_linux_user_data, true); + user_config -> getenv("XDG_CONFIG_HOME",?basedir_linux_user_config,true); + user_cache -> getenv("XDG_CACHE_HOME", ?basedir_linux_user_cache, true); + user_log -> getenv("XDG_CACHE_HOME", ?basedir_linux_user_log, true); + site_data -> + Base = getenv("XDG_DATA_DIRS",?basedir_linux_site_data,false), + string:lexemes(Base, ":"); + site_config -> + Base = getenv("XDG_CONFIG_DIRS",?basedir_linux_site_config,false), + string:lexemes(Base, ":") + end. + +-define(basedir_darwin_user_data, "Library/Application Support"). +-define(basedir_darwin_user_config, "Library/Application Support"). +-define(basedir_darwin_user_cache, "Library/Caches"). +-define(basedir_darwin_user_log, "Library/Logs"). +-define(basedir_darwin_site_data, "/Library/Application Support"). +-define(basedir_darwin_site_config, "/Library/Application Support"). + +basedir_darwin(Type) -> + case Type of + user_data -> basedir_join_home(?basedir_darwin_user_data); + user_config -> basedir_join_home(?basedir_darwin_user_config); + user_cache -> basedir_join_home(?basedir_darwin_user_cache); + user_log -> basedir_join_home(?basedir_darwin_user_log); + site_data -> [?basedir_darwin_site_data]; + site_config -> [?basedir_darwin_site_config] + end. + +%% On Windows: +%% ex. C:\Users\egil\AppData\Local\Ericsson\Erlang +%% %LOCALAPPDATA% is defined on Windows 7 and onwards +%% %APPDATA% is used instead of %LOCALAPPDATA% if it's not defined. +%% %APPDATA% is used for roaming, i.e. for user_config on Windows 7 and beyond. +%% +%% user_data %LOCALAPPDATA%[/$author]/$appname[/$version] +%% user_config %APPDATA%[/$author]/$appname[/$version] +%% user_cache %LOCALAPPDATA%[/$author]/$appname[/$version]/Cache +%% user_log %LOCALAPPDATA%[/$author]/$appname[/$version]/Logs + +-define(basedir_windows_user_data, "Local"). +-define(basedir_windows_user_config, "Roaming"). +-define(basedir_windows_user_cache, "Local"). %% Cache is added later +-define(basedir_windows_user_log, "Local"). %% Logs is added later + +basedir_windows(Type) -> + %% If LOCALAPPDATA is not defined we are likely on an + %% XP machine. Use APPDATA instead. + case basedir_windows_appdata() of + noappdata -> + %% No AppData is set + %% Probably running MSYS + case Type of + user_data -> basedir_join_home(?basedir_windows_user_data); + user_config -> basedir_join_home(?basedir_windows_user_config); + user_cache -> basedir_join_home(?basedir_windows_user_cache); + user_log -> basedir_join_home(?basedir_windows_user_log); + site_data -> []; + site_config -> [] + end; + {ok, AppData} -> + case Type of + user_data -> getenv("LOCALAPPDATA", AppData); + user_config -> AppData; + user_cache -> getenv("LOCALAPPDATA", AppData); + user_log -> getenv("LOCALAPPDATA", AppData); + site_data -> []; + site_config -> [] + end + end. + +basedir_windows_appdata() -> + case os:getenv("APPDATA") of + Invalid when Invalid =:= false orelse Invalid =:= [] -> + noappdata; + Val -> + {ok, Val} + end. + +%% basedir aux + +getenv(K,Def,false) -> getenv(K,Def); +getenv(K,Def,true) -> getenv(K,basedir_join_home(Def)). + +getenv(K,Def) -> + case os:getenv(K) of + [] -> Def; + false -> Def; + Val -> Val + end. + +basedir_join_home(Dir) -> + case os:getenv("HOME") of + false -> + {ok,[[Home]]} = init:get_argument(home), + filename:join(Home,Dir); + Home -> filename:join(Home,Dir) + end. + +basedir_os_type() -> + case os:type() of + {unix,darwin} -> darwin; + {win32,_} -> windows; + _ -> linux + end. + +%% +%% validate/1 +%% + +-spec validate(FileName) -> boolean() when + FileName :: file:name_all(). + +validate(FileName) when is_binary(FileName) -> + %% Raw filename... + validate_bin(FileName); +validate(FileName) when is_list(FileName); + is_atom(FileName) -> + validate_list(FileName, + file:native_name_encoding(), + os:type()). + +validate_list(FileName, Enc, Os) -> + try + true = validate_list(FileName, Enc, Os, 0) > 0 + catch + _ : _ -> false + end. + +validate_list([], _Enc, _Os, Chars) -> + Chars; +validate_list(C, Enc, Os, Chars) when is_integer(C) -> + validate_char(C, Enc, Os), + Chars+1; +validate_list(A, Enc, Os, Chars) when is_atom(A) -> + validate_list(atom_to_list(A), Enc, Os, Chars); +validate_list([H|T], Enc, Os, Chars) -> + NewChars = validate_list(H, Enc, Os, Chars), + validate_list(T, Enc, Os, NewChars). + +%% C is always an integer... +% validate_char(C, _, _) when not is_integer(C) -> +% throw(invalid); +validate_char(C, _, _) when C < 1 -> + throw(invalid); %% No negative or null characters... +validate_char(C, latin1, _) when C > 255 -> + throw(invalid); +validate_char(C, utf8, _) when C >= 16#110000 -> + throw(invalid); +validate_char(C, utf8, {win32, _}) when C > 16#ffff -> + throw(invalid); %% invalid win wchar... +validate_char(_C, utf8, {win32, _}) -> + ok; %% Range below is accepted on windows... +validate_char(C, utf8, _) when 16#D800 =< C, C =< 16#DFFF -> + throw(invalid); %% invalid unicode range... +validate_char(_, _, _) -> + ok. + +validate_bin(Bin) -> + %% Raw filename. That is, we do not interpret + %% the encoding, but we still do not accept + %% null characters... + try + true = validate_bin(Bin, 0) > 0 + catch + _ : _ -> false + end. + +validate_bin(<<>>, Bs) -> + Bs; +validate_bin(<<0, _Rest/binary>>, _Bs) -> + throw(invalid); %% No null characters allowed... +validate_bin(<<_B, Rest/binary>>, Bs) -> + validate_bin(Rest, Bs+1). |